From 7c348473ec99651c9c1c36c3ce653140664bbac8 Mon Sep 17 00:00:00 2001 From: Ashwin Agrawal Date: Mon, 29 Nov 2021 09:30:46 -0800 Subject: [PATCH 01/46] Fix shared snapshot collision FATAL "writer segworker group shared snapshot collision" happens when gp_vmem_idle_time reached, the QD will clean the idle writer and reader gang and close the connection to the QE, QE will quit in an async way. QD processes remain. If QE cannot quit before QD starts a new command, it will find the same session id in the shared snapshot and collision will happen. QE session quit may take time due to ProcArrayLock contention. Hence, this commit only cleans up reader gangs and not writer gang during idle cleanup session timeout. This way no need to remove and readd shared snapshot slot on QEs and hence avoids the collision possibility. (cherry picked from commit cc58ac6afec2587ae7afb489f59fc7c1d1949325) --- src/backend/cdb/dispatcher/cdbgang.c | 45 +++++++++---------------- src/backend/utils/time/sharedsnapshot.c | 7 +++- 2 files changed, 21 insertions(+), 31 deletions(-) diff --git a/src/backend/cdb/dispatcher/cdbgang.c b/src/backend/cdb/dispatcher/cdbgang.c index 41f1082a3e8..966c293ad7c 100644 --- a/src/backend/cdb/dispatcher/cdbgang.c +++ b/src/backend/cdb/dispatcher/cdbgang.c @@ -720,40 +720,25 @@ DisconnectAndDestroyAllGangs(bool resetSession) * Destroy all idle (i.e available) QEs. * It is always safe to get rid of the reader QEs. * - * If we are not in a transaction and we do not have a TempNamespace, destroy - * writer QEs as well. - * * call only from an idle session. */ void DisconnectAndDestroyUnusedQEs(void) { - if (IsTransactionOrTransactionBlock() || TempNamespaceOidIsValid()) - { - /* - * If we are in a transaction, we can't release the writer gang, - * as this will abort the transaction. - * - * If we have a TempNameSpace, we can't release the writer gang, as this - * would drop any temp tables we own. - * - * Since we are idle, any reader gangs will be available but not allocated. - */ - cdbcomponent_cleanupIdleQEs(false); - } - else - { - /* - * Get rid of ALL gangs... Readers and primary writer. - * After this, we have no resources being consumed on the segDBs at all. - * - * Our session wasn't destroyed due to an fatal error or FTS action, so - * we don't need to do anything special. Specifically, we DON'T want - * to act like we are now in a new session, since that would be confusing - * in the log. - * - */ - cdbcomponent_cleanupIdleQEs(true); - } + /* + * Only release reader gangs, never writer gang. This helps to avoid the + * shared snapshot collision error on next gang creation from hitting if + * QE processes are slow to exit due to this cleanup. + * + * If we are in a transaction, we can't release the writer gang also, as + * this will abort the transaction. + * + * If we have a TempNameSpace, we can't release the writer gang also, as + * this would drop any temp tables we own. + * + * Since we are idle, any reader gangs will be available but not + * allocated. + */ + cdbcomponent_cleanupIdleQEs(false); } /* diff --git a/src/backend/utils/time/sharedsnapshot.c b/src/backend/utils/time/sharedsnapshot.c index 851771dab7e..2f806edb4e6 100644 --- a/src/backend/utils/time/sharedsnapshot.c +++ b/src/backend/utils/time/sharedsnapshot.c @@ -379,7 +379,10 @@ SharedSnapshotAdd(int32 slotId) SharedSnapshotSlot *testSlot = &arrayP->slots[i]; if (testSlot->slotindex > arrayP->maxSlots) + { + LWLockRelease(SharedSnapshotLock); elog(ERROR, "Shared Local Snapshots Array appears corrupted: %s", SharedSnapshotDump()); + } if (testSlot->slotid == slotId) { @@ -403,8 +406,10 @@ SharedSnapshotAdd(int32 slotId) } else { + char *slot_dump = SharedSnapshotDump(); + LWLockRelease(SharedSnapshotLock); elog(ERROR, "writer segworker group shared snapshot collision on id %d. Slot array dump: %s", - slotId, SharedSnapshotDump()); + slotId, slot_dump); } } From 472ee4c59cd924aec8d00f5f30131dc9fec7acda Mon Sep 17 00:00:00 2001 From: Adam Lee Date: Mon, 7 Mar 2022 09:03:35 +0000 Subject: [PATCH 02/46] Backport PARALLEL RETRIEVE CURSOR changes These changes are back ported from 6X_STABLE branch, other than refining code and words, the names of UDFs are changed: ``` pg_catalog.gp_endpoints() -> pg_catalog.gp_get_endpoints() pg_catalog.gp_segment_endpoints() -> pg_catalog.gp_get_segment_endpoints() pg_catalog.gp_session_endpoints() -> pg_catalog.gp_get_session_endpoints() ``` And views are created for convenience: ``` CREATE VIEW pg_catalog.gp_endpoints AS SELECT * FROM pg_catalog.gp_get_endpoints(); CREATE VIEW pg_catalog.gp_segment_endpoints AS SELECT * FROM pg_catalog.gp_get_segment_endpoints(); CREATE VIEW pg_catalog.gp_session_endpoints AS SELECT * FROM pg_catalog.gp_get_session_endpoints(); ``` Co-Authored-By: Jian Guo Co-Authored-By: Xuejing Zhao --- doc/src/sgml/ref/allfiles.sgml | 1 + doc/src/sgml/ref/declare.sgml | 28 +- doc/src/sgml/ref/retrieve.sgml | 89 ++++++ src/backend/catalog/system_views.sql | 21 +- src/backend/cdb/dispatcher/cdbdisp_async.c | 42 ++- src/backend/cdb/endpoint/README | 58 ++-- src/backend/cdb/endpoint/cdbendpoint.c | 302 ++++++++---------- .../cdb/endpoint/cdbendpoint_private.h | 14 +- .../cdb/endpoint/cdbendpointretrieve.c | 184 ++++++----- src/backend/cdb/endpoint/cdbendpointutils.c | 142 +++++--- src/backend/commands/async.c | 4 + src/backend/commands/sequence.c | 9 +- src/backend/executor/execMain.c | 77 +++-- src/backend/optimizer/util/pathnode.c | 13 +- src/backend/storage/ipc/shm_toc.c | 1 + src/backend/tcop/postgres.c | 18 +- src/backend/utils/init/postinit.c | 1 + src/include/catalog/pg_proc.dat | 4 +- src/include/cdb/cdbdisp.h | 9 +- src/include/cdb/cdbendpoint.h | 47 ++- src/include/commands/async.h | 1 + .../test_parallel_retrieve_cursor_nowait.c | 6 +- .../test_parallel_retrieve_cursor_wait.c | 6 +- src/test/isolation2/.gitignore | 2 + src/test/isolation2/Makefile | 2 +- .../init_file_parallel_retrieve_cursor | 4 + .../parallel_retrieve_cursor/corner.source | 148 ++++----- .../fault_inject.source | 60 ++-- .../parallel_retrieve_cursor/privilege.source | 32 +- .../replicated_table.source | 22 +- .../retrieve_quit_check.source | 18 +- .../retrieve_quit_wait.source | 24 +- .../parallel_retrieve_cursor/security.source | 2 +- .../special_query.source | 6 +- .../status_check.source | 110 +++---- .../status_wait.source | 153 ++++++--- .../parallel_retrieve_cursor/corner.source | 148 ++++----- .../fault_inject.source | 90 +++--- .../parallel_retrieve_cursor/privilege.source | 78 ++--- .../replicated_table.source | 25 +- .../retrieve_quit_check.source | 18 +- .../retrieve_quit_wait.source | 24 +- .../parallel_retrieve_cursor/security.source | 8 +- .../special_query.source | 6 +- .../status_check.source | 144 ++++----- .../status_wait.source | 281 ++++++++++++---- ..._parallel_retrieve_cursor_extended_query.c | 4 +- ...lel_retrieve_cursor_extended_query_error.c | 4 +- 48 files changed, 1453 insertions(+), 1037 deletions(-) create mode 100644 doc/src/sgml/ref/retrieve.sgml diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml index 8063efae616..2b69b954af4 100644 --- a/doc/src/sgml/ref/allfiles.sgml +++ b/doc/src/sgml/ref/allfiles.sgml @@ -170,6 +170,7 @@ Complete list of usable sgml source files in this directory. + diff --git a/doc/src/sgml/ref/declare.sgml b/doc/src/sgml/ref/declare.sgml index bbbd335bd0b..3950d256e53 100644 --- a/doc/src/sgml/ref/declare.sgml +++ b/doc/src/sgml/ref/declare.sgml @@ -21,13 +21,13 @@ PostgreSQL documentation DECLARE - define a cursor + define a cursor or a parallel retrieve cursor DECLARE name [ BINARY ] [ ASENSITIVE | INSENSITIVE ] [ [ NO ] SCROLL ] - CURSOR [ { WITH | WITHOUT } HOLD ] FOR query + [ PARALLEL RETRIEVE ] CURSOR [ { WITH | WITHOUT } HOLD ] FOR query @@ -42,6 +42,22 @@ DECLARE name [ BINARY ] [ ASENSITIV FETCH. + + Like a normal cursor, user can declare a parallel retrieve cursor on + coordinator, then retrieve the query results on each segment directly. + + + + Parallel retrieve cursor has similar declaration and syntax as normal cursor + does. However, some cursor operations are not supported in parallel retrieve + cursor(e.g. MOVE). + + + + NOTE: Orca doesn't support PARALLEL RETRIEVE CURSOR for now. It would fall + back to postgres optimizer automatically. + + This page describes usage of cursors at the SQL command level. @@ -113,6 +129,10 @@ DECLARE name [ BINARY ] [ ASENSITIV SCROLL. See below for details. + + + PARALLEL RETRIEVE CURSOR with WITH SCROLL is not supported. + @@ -128,6 +148,10 @@ DECLARE name [ BINARY ] [ ASENSITIV WITH HOLD is specified, WITHOUT HOLD is the default. + + + PARALLEL RETRIEVE CURSOR with WITH HOLD is not supported. + diff --git a/doc/src/sgml/ref/retrieve.sgml b/doc/src/sgml/ref/retrieve.sgml new file mode 100644 index 00000000000..470d318d391 --- /dev/null +++ b/doc/src/sgml/ref/retrieve.sgml @@ -0,0 +1,89 @@ + + + + + RETRIEVE + + + + RETRIEVE + 7 + SQL - Language Statements + + + + RETRIEVE + retrieve from endpoint + + + + +RETRIEVE { ALL | count } FROM ENDPOINT endpoint_name; + + + + + + Description + + + RETRIEVE retrieves data from endpoint. + + + + In each retrieve session, the query result on that segment can be + retrieved by using statement "RETRIEVE" and its corresponding endpoint + name. + + + + An empty set will be returned if no more + tuples for the endpoint. + + + + + Parameters + + + + count + + + a positive integer value needs to be provided as the "count" to + specify how many rows to retrieve. + + + + + + ALL + + + Parameter "ALL" means to retrieve all the results from the endpoint. + + + + + + endpoint_name + + + The endpoint to retrieve data from, endpoint names can be listed from function gp_get_endpoints(). + + + + + + + + See Also + + + + + + diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 62993d70c8e..48e6068c0f0 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1527,18 +1527,27 @@ $$ $$ LANGUAGE SQL READS SQL DATA EXECUTE ON COORDINATOR; -CREATE FUNCTION gp_session_endpoints (OUT gp_segment_id int, OUT auth_token text, - OUT cursorname text, OUT sessionid int, OUT hostname text, - OUT port int, OUT userid oid, OUT state text, +CREATE FUNCTION gp_get_session_endpoints (OUT gp_segment_id int, OUT auth_token text, + OUT cursorname text, OUT sessionid int, OUT hostname varchar(64), + OUT port int, OUT username text, OUT state text, OUT endpointname text) RETURNS SETOF RECORD AS $$ - SELECT * FROM gp_endpoints() + SELECT * FROM pg_catalog.gp_get_endpoints() WHERE sessionid = (SELECT setting FROM pg_settings WHERE name = 'gp_session_id')::int4 $$ LANGUAGE SQL EXECUTE ON COORDINATOR; -COMMENT ON FUNCTION pg_catalog.gp_session_endpoints() IS 'All endpoints in this session that are visible to the current user.'; +COMMENT ON FUNCTION pg_catalog.gp_get_session_endpoints() IS 'All endpoints in this session that are visible to the current user.'; + +CREATE VIEW pg_catalog.gp_endpoints AS + SELECT * FROM pg_catalog.gp_get_endpoints(); + +CREATE VIEW pg_catalog.gp_segment_endpoints AS + SELECT * FROM pg_catalog.gp_get_segment_endpoints(); + +CREATE VIEW pg_catalog.gp_session_endpoints AS + SELECT * FROM pg_catalog.gp_get_session_endpoints(); CREATE VIEW pg_stat_bgwriter AS SELECT @@ -1747,4 +1756,4 @@ REVOKE ALL ON pg_replication_origin_status FROM public; REVOKE ALL ON pg_subscription FROM public; GRANT SELECT (oid, subdbid, subname, subowner, subenabled, subbinary, substream, subslotname, subsynccommit, subpublications) - ON pg_subscription TO public; + ON pg_subscription TO public; \ No newline at end of file diff --git a/src/backend/cdb/dispatcher/cdbdisp_async.c b/src/backend/cdb/dispatcher/cdbdisp_async.c index 847410a46e1..8f5ba72928b 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_async.c +++ b/src/backend/cdb/dispatcher/cdbdisp_async.c @@ -44,7 +44,7 @@ #include "commands/sequence.h" #include "access/xact.h" #include "utils/timestamp.h" -#define DISPATCH_WAIT_TIMEOUT_MSEC 1000 +#define DISPATCH_WAIT_TIMEOUT_MSEC 2000 /* * Ideally, we should set timeout to zero to cancel QEs as soon as possible, @@ -53,6 +53,13 @@ */ #define DISPATCH_WAIT_CANCEL_TIMEOUT_MSEC 100 +/* + * DISPATCH_NO_WAIT means return immediate when there's no more data, + * DISPATCH_WAIT_UNTIL_FINISH means wait until all dispatch works are completed. + */ +#define DISPATCH_NO_WAIT 0 +#define DISPATCH_WAIT_UNTIL_FINISH -1 + typedef struct CdbDispatchCmdAsync { @@ -74,9 +81,9 @@ typedef struct CdbDispatchCmdAsync volatile DispatchWaitMode waitMode; /* - * When waitMode is set to DISPATCH_WAIT_ACK_ROOT/DISPATCH_WAIT_ACK_ALL, - * the expected acknowledge message from QE should be specified. This field - * stores the expected acknowledge message. + * When waitMode is set to DISPATCH_WAIT_ACK_ROOT, + * the expected acknowledge message from QE should be specified. + * This field stores the expected acknowledge message. */ const char *ackMessage; @@ -155,7 +162,7 @@ cdbdisp_checkForCancel_async(struct CdbDispatcherState *ds) { Assert(ds); - checkDispatchResult(ds, 0); + checkDispatchResult(ds, DISPATCH_NO_WAIT); return cdbdisp_checkResultsErrcode(ds->primaryResults); } @@ -402,7 +409,7 @@ cdbdisp_checkDispatchResult_async(struct CdbDispatcherState *ds, if (waitMode != DISPATCH_WAIT_NONE) pParms->waitMode = waitMode; - checkDispatchResult(ds, -1); + checkDispatchResult(ds, DISPATCH_WAIT_UNTIL_FINISH); /* * It looks like everything went fine, make sure we don't miss a user @@ -441,6 +448,9 @@ cdbdisp_makeDispatchParams_async(int maxSlices, int largestGangSize, char *query /* * Receive and process results from all running QEs. + * timeout_sec: the second that the dispatcher waits for the ack messages at most. + * DISPATCH_NO_WAIT(0): return immediate when there's no more data. + * DISPATCH_WAIT_UNTIL_FINISH(-1): wait until all dispatch works are completed. * * Don't throw out error, instead, append the error message to * CdbDispatchResult.error_message. @@ -462,6 +472,7 @@ checkDispatchResult(CdbDispatcherState *ds, int timeout_sec) uint8 ftsVersion = 0; #endif int64 diff_us; + bool cancelRequested = false; db_count = pParms->dispatchCount; fds = (struct pollfd *) palloc(db_count * sizeof(struct pollfd)); @@ -486,12 +497,22 @@ checkDispatchResult(CdbDispatcherState *ds, int timeout_sec) if (proc_exit_inprogress) break; + PG_TRY(); + { + CHECK_FOR_INTERRUPTS(); + } + PG_CATCH(); + { + cancelRequested = true; + } + PG_END_TRY(); + /* - * escalate waitMode to cancel if: - user interrupt has occurred, - or - * an error has been reported by any QE, - in case the caller wants + * escalate waitMode to cancel if: - user cancel request has occurred, + * - or an error has been reported by any QE, - in case the caller wants * cancelOnError */ - if ((InterruptPending || meleeResults->errcode) && meleeResults->cancelOnError) + if ((cancelRequested || meleeResults->errcode) && meleeResults->cancelOnError) pParms->waitMode = DISPATCH_WAIT_CANCEL; /* @@ -650,6 +671,9 @@ checkDispatchResult(CdbDispatcherState *ds, int timeout_sec) } pfree(fds); + + if (cancelRequested) + PG_RE_THROW(); } /* diff --git a/src/backend/cdb/endpoint/README b/src/backend/cdb/endpoint/README index 7c612a740ac..eb5db056958 100644 --- a/src/backend/cdb/endpoint/README +++ b/src/backend/cdb/endpoint/README @@ -1,6 +1,6 @@ src/backend/cdb/endpoint/README -With the size of greenplum cluster increasing, the performance bottleneck on +With the size of Greenplum cluster increasing, the performance bottleneck on the coordinator node becomes more and more obvious. The parallel retrieve cursor feature is designed to reduce the heavy burdens of @@ -20,14 +20,15 @@ Parallel retrieve cursor has similar declaration and syntax as normal cursor does. However, some cursor operations are not supported in parallel retrieve cursor(e.g. MOVE). -#NOTE: Orca doesn't support PARALLEL RETRIEVE CURSOR for now. It would fall -back to postgres optimizer automatically. +#NOTE: Orca doesn't support PARALLEL RETRIEVE CURSOR for now. Greenplum would +fall back to postgres optimizer automatically. Endpoint ======== Once a parallel retrieve cursor has been declared on QD, a corresponding -endpoint will be created on each QE which contains the query result. Then, +endpoint will be created on the same segment whose QE contains the query +result. These endpoints are backed by individual backends on each QE. Then, those endpoints can be used as the source, and results can be retrieved from them in parallel on each QE. @@ -56,26 +57,26 @@ List Parallel Retrieve Cursors and their endpoints To retrieve the query results of a parallel retrieve cursor, the related endpoint information is needed before start a retrieve session on QEs. The UDF -gp_endpoints() can be used to list parallel retrieve cursors and their +gp_get_endpoints() can be used to list parallel retrieve cursors and their endpoints information. This UDF could be run on the coordinator only. For a superuser, it can list all endpoints information of all users', but for non-superuser, it can only list the current user's endpoints information for security reason. -Definition: gp_endpoints() +Definition: gp_get_endpoints() -gp_endpoints Columns: +gp_get_endpoints() Columns: |-------------+-----------+------------------------------------------| | Column Name | Data Type | Description | |-------------+-----------+------------------------------------------| | dbid | integer | The QE's dbid | |-------------+-----------+------------------------------------------| -| auth_token | text | Retrieve session authentication token | +| auth_token | text | Retrieve-session authentication token | |-------------+-----------+------------------------------------------| | cursorname | text | Parallel retrieve cursor name | |-------------+-----------+------------------------------------------| -| sessionid | integer | The session where the cursor created in | +| sessionid | integer | The session where the cursor was created | |-------------+-----------+------------------------------------------| | hostname | text | The host to retrieve from | |-------------+-----------+------------------------------------------| @@ -104,7 +105,7 @@ gp_endpoints Columns: Examples: -postgres=# select * from gp_endpoints(); +postgres=# select * from gp_get_endpoints(); dbid | auth_token | cursorname | sessionid | hostname | port | userid | state | endpointname ------+----------------------------------+------------+-----------+----------+------+--------+-------+-------------------- 2 | 75ebe7b49c3e09f35e017fc0181c62cf | c3 | 105 | host67 | 7002 | 10 | READY | c30000006900000005 @@ -113,13 +114,14 @@ postgres=# select * from gp_endpoints(); (3 rows) The userid of the endpoint is the session user, not the current user. For -example, if login the database with user1, then set role to another user -user2 and declare a parallel retrieve cursor. The userid of these endpoints -is user1's oid, not user2's oid. The session user should be used to start a -retrieve connection. This is typically due to the security concern, e.g. if -user2 is nologin, we surely do not expect to retrieive using user2. - -There is another similar gp_session_endpoints() that shows the endpoint +example, if a user logs in to the database as user1, then uses set role to +switch to another user (e.g., user2) and declare a parallel retrieve cursor. +The userid of these endpoints is user1's oid, not user2's oid. The session user +(i.e., user1) should be used to start a retrieve connection. This is typically +due to the security concern, e.g. if user2 is nologin, we should not be able to +retrieve using user2. + +There is another similar gp_get_session_endpoints() that shows the endpoint informations that belong to this session only. Start A Retrieve Session @@ -180,13 +182,13 @@ List Endpoints In Utility Session On Endpoint QE It is possible to list all sessions' endpoints in the UTILITY connection to specific endpoint (coordinator or segment node) by using UDF -gp_segment_endpoints(). Same as the UDF gp_endpoints(), a superuser can see the +gp_get_segment_endpoints(). Same as the UDF gp_get_endpoints(), a superuser can see the endpoint information of all users, but non-superuser can see its endpoints information only for security reason. -Definition: gp_segment_endpoints() +Definition: gp_get_segment_endpoints() -gp_segment_endpoints Columns: +gp_get_segment_endpoints() Columns: |--------------+-----------+------------------------------------------| | Column Name | Data Type | Description | |--------------+-----------+------------------------------------------| @@ -201,7 +203,7 @@ gp_segment_endpoints Columns: | | | received on | |--------------+-----------+------------------------------------------| | state | text | The state of the endpoint | -| | | See gp_endpoints() for more details | +| | | See gp_get_endpoints() for more details | |--------------+-----------+------------------------------------------| | dbid | integer | The QE's dbid | |--------------+-----------+------------------------------------------| @@ -219,7 +221,7 @@ Examples: # Connect the segment in utility mode $> PGOPTIONS="-c gp_role=utility" psql -h host67 -p 7002 -d postgres -postgres=# select * from gp_segment_endpoints(); +postgres=# select * from gp_get_segment_endpoints(); auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname ----------------------------------+------------+-----------+-------------+-------+------+-----------+--------+--------------------+------------ 75ebe7b49c3e09f35e017fc0181c62cf | 13361 | 3854 | -1 | READY | 2 | 105 | 10 | c30000006900000005 | c3 @@ -228,7 +230,7 @@ postgres=# select * from gp_segment_endpoints(); Wait Parallel Retrieve Cursor To Be Fully Retrieved =================================================== -UDF gp_wait_parallel_retrieve_cursor(text) is designed to block until all the +UDF gp_wait_parallel_retrieve_cursor() is designed to block until all the endpoints have been fully retrieved for the given parallel retrieve cursor until timeout happens. It will block the coordinator session until all the relevant endpoints are fully retrieved unless timeout or error happens. When @@ -246,7 +248,7 @@ an error message will be thrown. Examples: -postgres=# SELECT gp_wait_parallel_retrieve_cursor('c3'); +postgres=# SELECT gp_wait_parallel_retrieve_cursor('c3', -1); gp_wait_parallel_retrieve_cursor ----------------------------------- t @@ -315,7 +317,7 @@ DECLARE -- List endpoints to get the needed information to start retrieving -- sessions on segments -postgres=# SELECT * FROM gp_endpoints(); +postgres=# SELECT * FROM gp_get_endpoints(); dbid | auth_token | cursorname | sessionid | hostname | port | userid | state | endpointname ------+----------------------------------+------------+-----------+----------+------+--------+-------+-------------------- 2 | c5c116a13e2fdb8b436cdbc8e1bc7365 | c1 | 22 | host67 | 7002 | 10 | READY | c1000000160000000a @@ -342,7 +344,7 @@ Now the state of endpoint c1000000160000000a for dbid 2 (host67:7002) should become "FINISHED" since all results on the segment have been retrieved. -- List endpoints on coordinator to check -postgres=# SELECT * FROM gp_endpoints(); +postgres=# SELECT * FROM gp_get_endpoints(); dbid | auth_token | cursorname | sessionid | hostname | port | userid | state | endpointname ------+----------------------------------+------------+-----------+----------+------+--------+----------+-------------------- 2 | c5c116a13e2fdb8b436cdbc8e1bc7365 | c1 | 22 | host67 | 7002 | 10 | FINISHED | c1000000160000000a @@ -370,7 +372,7 @@ has been attached by a receiver. If the receiver has retrieved all the data from the endpoint, the state becomes FINISHED. -- List endpoints on coordinator to check -postgres=# SELECT * FROM gp_endpoints(); +postgres=# SELECT * FROM gp_get_endpoints(); dbid | auth_token | cursorname | sessionid | hostname | port | userid | state | endpointname ------+----------------------------------+------------+-----------+----------+------+--------+----------+-------------------- 2 | c5c116a13e2fdb8b436cdbc8e1bc7365 | c1 | 22 | host67 | 7002 | 10 | FINISHED | c1000000160000000a @@ -392,7 +394,7 @@ postgres=# CLOSE c1; CLOSE CURSOR -- All endpoints are gone -postgres=# SELECT * FROM gp_endpoints(); +postgres=# SELECT * FROM gp_get_endpoints(); dbid | auth_token | cursorname | sessionid | hostname | port | userid | state | endpointname ------+------------+------------+-----------+----------+------+--------+-------+-------------- (0 rows) diff --git a/src/backend/cdb/endpoint/cdbendpoint.c b/src/backend/cdb/endpoint/cdbendpoint.c index eb00afee2ca..236382e6788 100644 --- a/src/backend/cdb/endpoint/cdbendpoint.c +++ b/src/backend/cdb/endpoint/cdbendpoint.c @@ -5,10 +5,10 @@ * dedicated QE. One parallel retrieve cursor could have multiple endpoints * on different QEs to allow retrieving in parallel. * - * This file implements the sender part of endpoint. + * This file implements the sender part of an endpoint. * - * Endpoint may exist on the coordinator or segments, depends on the query of - * the PARALLEL RETRIEVE CURSOR: + * Endpoints may exist on the coordinator or segments, depending on the query + * of the PARALLEL RETRIEVE CURSOR: * (1) An endpoint is on QD only if the query of the parallel cursor needs to * be finally gathered by the master. e.g.: * > DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM T1 ORDER BY C1; @@ -23,9 +23,9 @@ * will be created first on QEs. An instance of Endpoint struct in the shared * memory represents the endpoint. Through the Endpoint, the client could know * the endpoint's identification (endpoint name), location (dbid, host, port - * and session id), and the state for the retrieve session. All of those - * information can be obtained on QD by UDF gp_endpoints() via dispatching - * endpoint queries or on QE's retrieve session by UDF gp_segment_endpoints(). + * and session id), and the state for the retrieve session. All of this + * information can be obtained on QD by UDF gp_get_endpoints() via dispatching + * endpoint queries or on QE's retrieve session by UDF gp_get_segment_endpoints(). * * Instead of returning the query result to QD through a normal dest receiver, * endpoints write the results to TQueueDestReceiver which is a shared memory @@ -34,9 +34,9 @@ * is also stored in the Endpoint so that the retrieve session on the same QE * can know. * - * The token is stored in a different structure SessionInfoEntry to make the - * tokens same for all endpoints in the same session. The token is created on - * each QE after plan get dispatched. + * The token is stored in a different structure EndpointTokenEntry to make the + * tokens same for all backends within the same session under the same postmaster. + * The token is created on each QE after plan get dispatched. * * DECLARE returns only when endpoint and token are ready and query starts * execution. See WaitEndpointsReady(). @@ -64,6 +64,7 @@ #include "access/tupdesc.h" #include "access/xact.h" #include "common/hashfn.h" +#include "commands/async.h" #include "libpq-fe.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" @@ -81,7 +82,7 @@ #include "cdb/cdbsrlz.h" #include "cdb/cdbvars.h" -#define WAIT_ENDPOINT_TIMEOUT 100 +#define WAIT_ENDPOINT_TIMEOUT_MS 100 /* * The size of endpoint tuple queue in bytes. @@ -97,32 +98,32 @@ #define DUMMY_CURSOR_NAME "DUMMYCURSORNAME" #endif -static EndpointExecState * CurrEndpointExecState; +static EndpointExecState * CurrentEndpointExecState; -typedef struct SessionTokenTag +typedef struct EndpointTokenTag { int sessionID; Oid userID; -} SessionTokenTag; +} EndpointTokenTag; /* - * sharedSessionInfoHash is located in shared memory on each segment for + * EndpointTokenHash is located in shared memory on each segment for * authentication purpose. */ -typedef struct SessionInfoEntry +typedef struct EndpointTokenEntry { - SessionTokenTag tag; + EndpointTokenTag tag; /* The auth token for this session. */ - int8 token[ENDPOINT_TOKEN_HEX_LEN]; + int8 token[ENDPOINT_TOKEN_ARR_LEN]; /* How many endpoints are referred to this entry. */ uint16 refCount; -} SessionInfoEntry; +} EndpointTokenEntry; /* Shared hash table for session infos */ -static HTAB *sharedSessionInfoHash = NULL; +static HTAB *EndpointTokenHash = NULL; /* Point to Endpoint entries in shared memory */ static struct EndpointData *sharedEndpoints = NULL; @@ -131,24 +132,21 @@ static struct EndpointData *sharedEndpoints = NULL; static void InitSharedEndpoints(void); /* Token utility functions */ -static const int8 *get_or_create_token(void); +static const int8 *create_endpoint_token(void); /* Endpoint helper function */ -static void EndpointNotifyQD(const char *message); -static Endpoint alloc_endpoint(const char *cursorName, dsm_handle dsmHandle); -static void free_endpoint(Endpoint endpoint); +static Endpoint *alloc_endpoint(const char *cursorName, dsm_handle dsmHandle); +static void free_endpoint(Endpoint *endpoint); static void create_and_connect_mq(TupleDesc tupleDesc, dsm_segment **mqSeg /* out */ , shm_mq_handle **mqHandle /* out */ ); static void detach_mq(dsm_segment *dsmSeg); -static void setup_session_info_entry(void); -static void wait_receiver(EndpointExecState * state); -static void unset_endpoint_sender_pid(Endpoint endPoint); -static void abort_endpoint(EndpointExecState * state); +static void setup_endpoint_token_entry(void); +static void wait_receiver(void); +static void unset_endpoint_sender_pid(Endpoint *endPoint); +static void abort_endpoint(void); static void wait_parallel_retrieve_close(void); -/* utility */ -static void generate_endpoint_name(char *name, const char *cursorName); /* * Calculate the shared memory size for PARALLEL RETRIEVE CURSOR execute. @@ -165,7 +163,7 @@ EndpointShmemSize(void) * the maximum endpoint number, so use MAX_ENDPOINT_SIZE here. */ size = add_size( - size, hash_estimate_size(MAX_ENDPOINT_SIZE, sizeof(SessionInfoEntry))); + size, hash_estimate_size(MAX_ENDPOINT_SIZE, sizeof(EndpointTokenEntry))); return size; } @@ -178,7 +176,7 @@ EndpointShmemInit(void) bool found; HASHCTL hctl; - sharedEndpoints = (Endpoint) + sharedEndpoints = (Endpoint *) ShmemInitStruct(SHMEM_ENDPOINTS_ENTRIES, MAXALIGN(mul_size(MAX_ENDPOINT_SIZE, sizeof(struct EndpointData))), &found); @@ -186,10 +184,10 @@ EndpointShmemInit(void) InitSharedEndpoints(); MemSet(&hctl, 0, sizeof(hctl)); - hctl.keysize = sizeof(SessionTokenTag); - hctl.entrysize = sizeof(SessionInfoEntry); + hctl.keysize = sizeof(EndpointTokenTag); + hctl.entrysize = sizeof(EndpointTokenEntry); hctl.hash = tag_hash; - sharedSessionInfoHash = + EndpointTokenHash = ShmemInitHash(SHMEM_ENPOINTS_SESSION_INFO, MAX_ENDPOINT_SIZE, MAX_ENDPOINT_SIZE, &hctl, HASH_ELEM | HASH_FUNCTION); } @@ -200,7 +198,7 @@ EndpointShmemInit(void) static void InitSharedEndpoints() { - Endpoint endpoints = sharedEndpoints; + Endpoint *endpoints = sharedEndpoints; for (int i = 0; i < MAX_ENDPOINT_SIZE; ++i) { @@ -259,17 +257,18 @@ WaitEndpointsReady(EState *estate) * Get or create a authentication token for current session. */ static const int8 * -get_or_create_token(void) +create_endpoint_token(void) { static int sessionId = InvalidEndpointSessionId; - static int8 currentToken[ENDPOINT_TOKEN_HEX_LEN] = {0}; + static int8 currentToken[ENDPOINT_TOKEN_ARR_LEN] = {0}; + /* Generate a new token only if gp_session_id has changed */ if (sessionId != gp_session_id) { sessionId = gp_session_id; - if (!pg_strong_random(currentToken, ENDPOINT_TOKEN_HEX_LEN)) + if (!pg_strong_random(currentToken, ENDPOINT_TOKEN_ARR_LEN)) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("failed to generate a new random token"))); + errmsg("failed to generate a new random token for session id %d", sessionId))); } return currentToken; } @@ -277,16 +276,11 @@ get_or_create_token(void) /* * Send acknowledge message to QD. */ -static void +void EndpointNotifyQD(const char *message) { - StringInfoData buf; + NotifyMyFrontEnd(CDB_NOTIFY_ENDPOINT_ACK, message, MyProcPid); - pq_beginmessage(&buf, 'A'); - pq_sendint(&buf, MyProcPid, sizeof(int32)); - pq_sendstring(&buf, CDB_NOTIFY_ENDPOINT_ACK); - pq_sendstring(&buf, message); - pq_endmessage(&buf); pq_flush(); } @@ -297,31 +291,28 @@ EndpointNotifyQD(const char *message) */ void SetupEndpointExecState(TupleDesc tupleDesc, const char *cursorName, - EndpointExecState * state) + CmdType operation, DestReceiver **endpointDest) { shm_mq_handle *shmMqHandle; - DestReceiver *endpointDest; + + allocEndpointExecState(); /* * The message queue needs to be created first since the dsm_handle has to * be ready when create EndpointDesc entry. */ - create_and_connect_mq(tupleDesc, &state->dsmSeg, &shmMqHandle); + create_and_connect_mq(tupleDesc, &(CurrentEndpointExecState->dsmSeg), &shmMqHandle); /* * Alloc endpoint and set it as the active one for sender. */ - state->endpoint = - alloc_endpoint(cursorName, dsm_segment_handle(state->dsmSeg)); - setup_session_info_entry(); + CurrentEndpointExecState->endpoint = + alloc_endpoint(cursorName, dsm_segment_handle(CurrentEndpointExecState->dsmSeg)); + setup_endpoint_token_entry(); - /* - * Once the endpoint has been created in shared memory, send acknowledge - * message to QD so DECLARE PARALLEL RETRIEVE CURSOR statement can finish. - */ - EndpointNotifyQD(ENDPOINT_READY_ACK_MSG); - endpointDest = CreateTupleQueueDestReceiver(shmMqHandle); - state->dest = endpointDest; + CurrentEndpointExecState->dest = CreateTupleQueueDestReceiver(shmMqHandle); + (CurrentEndpointExecState->dest->rStartup)(CurrentEndpointExecState->dest, operation, tupleDesc); + *endpointDest = CurrentEndpointExecState->dest; } /* @@ -335,19 +326,19 @@ SetupEndpointExecState(TupleDesc tupleDesc, const char *cursorName, * Should also clean all other endpoint info here. */ void -DestroyEndpointExecState(EndpointExecState * state) +DestroyEndpointExecState() { - DestReceiver *endpointDest = state->dest; + DestReceiver *endpointDest = CurrentEndpointExecState->dest; - Assert(state->endpoint); - Assert(state->dsmSeg); + Assert(CurrentEndpointExecState->endpoint); + Assert(CurrentEndpointExecState->dsmSeg); /* * wait for receiver to start tuple retrieving. ackDone latch will be * reset to be re-used when retrieving finished. See notify_sender() * callers. */ - wait_receiver(state); + wait_receiver(); /* * tqueueShutdownReceiver() (rShutdown callback) will call @@ -356,17 +347,17 @@ DestroyEndpointExecState(EndpointExecState * state) */ (*endpointDest->rShutdown) (endpointDest); (*endpointDest->rDestroy) (endpointDest); - state->dest = NULL; + CurrentEndpointExecState->dest = NULL; /* * Wait until all data is retrieved by receiver. This is needed because * when the endpoint sends all data to shared message queue. The retrieve * session may still not get all data. */ - wait_receiver(state); + wait_receiver(); LWLockAcquire(ParallelCursorEndpointLock, LW_EXCLUSIVE); - unset_endpoint_sender_pid(state->endpoint); + unset_endpoint_sender_pid(CurrentEndpointExecState->endpoint); LWLockRelease(ParallelCursorEndpointLock); /* Notify QD */ EndpointNotifyQD(ENDPOINT_FINISHED_ACK_MSG); @@ -375,20 +366,20 @@ DestroyEndpointExecState(EndpointExecState * state) * If all data get sent, hang the process and wait for QD to close it. The * purpose is to not clean up Endpoint entry until CLOSE/COMMIT/ABORT * (i.e. PortalCleanup get executed). So user can still see the finished - * endpoint status through the gp_endpoints() UDF. This is needed because + * endpoint status through the gp_get_endpoints() UDF. This is needed because * pg_cursor view can still see the PARALLEL RETRIEVE CURSOR */ wait_parallel_retrieve_close(); LWLockAcquire(ParallelCursorEndpointLock, LW_EXCLUSIVE); - free_endpoint(state->endpoint); + free_endpoint(CurrentEndpointExecState->endpoint); LWLockRelease(ParallelCursorEndpointLock); - state->endpoint = NULL; + CurrentEndpointExecState->endpoint = NULL; - detach_mq(state->dsmSeg); - state->dsmSeg = NULL; + detach_mq(CurrentEndpointExecState->dsmSeg); + CurrentEndpointExecState->dsmSeg = NULL; - CurrEndpointExecState = NULL; + CurrentEndpointExecState = NULL; } /* @@ -398,11 +389,11 @@ DestroyEndpointExecState(EndpointExecState * state) * dsmHandle - dsm handle of shared memory message queue. */ static Endpoint -alloc_endpoint(const char *cursorName, dsm_handle dsmHandle) +*alloc_endpoint(const char *cursorName, dsm_handle dsmHandle) { int i; int foundIdx = -1; - Endpoint ret = NULL; + Endpoint *ret = NULL; dsm_handle session_dsm_handle; session_dsm_handle = GetSessionDsmHandle(); @@ -463,7 +454,7 @@ alloc_endpoint(const char *cursorName, dsm_handle dsmHandle) if (foundIdx == -1) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("failed to allocate endpoint"))); + errmsg("failed to allocate endpoint for session id %d", gp_session_id))); generate_endpoint_name(sharedEndpoints[i].name, cursorName); strlcpy(sharedEndpoints[i].cursorName, cursorName, NAMEDATALEN); @@ -524,6 +515,7 @@ create_and_connect_mq(TupleDesc tupleDesc, dsm_segment **mqSeg /* out */ , /* Create dsm and initialize toc. */ *mqSeg = dsm_create(tocSize, 0); + /* Make sure the dsm sticks around up until session exit */ dsm_pin_mapping(*mqSeg); toc = shm_toc_create(ENDPOINT_MSG_QUEUE_MAGIC, dsm_segment_address(*mqSeg), @@ -542,28 +534,29 @@ create_and_connect_mq(TupleDesc tupleDesc, dsm_segment **mqSeg /* out */ , shm_toc_insert(toc, ENDPOINT_KEY_TUPLE_QUEUE, mq); shm_mq_set_sender(mq, MyProc); *mqHandle = shm_mq_attach(mq, *mqSeg, NULL); + if (*mqHandle == NULL) + ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("attach to endpoint shared message queue failed"))); } /* - * Create/reuse SessionInfoEntry for current session in shared memory. - * SessionInfoEntry is used for authentication in the retrieve sessions. + * Create/reuse EndpointTokenEntry for current session in shared memory. + * EndpointTokenEntry is used for authentication in the retrieve sessions. */ static void -setup_session_info_entry() +setup_endpoint_token_entry() { - SessionInfoEntry *infoEntry = NULL; + EndpointTokenEntry *infoEntry = NULL; bool found = false; - SessionTokenTag tag; + EndpointTokenTag tag; const int8 *token = NULL; tag.sessionID = gp_session_id; tag.userID = GetSessionUserId(); LWLockAcquire(ParallelCursorEndpointLock, LW_EXCLUSIVE); - infoEntry = (SessionInfoEntry *) hash_search(sharedSessionInfoHash, &tag, - HASH_ENTER, &found); - elog(DEBUG3, "CDB_ENDPOINT: Finish endpoint init. Found SessionInfoEntry? %d", - found); + infoEntry = (EndpointTokenEntry *) hash_search(EndpointTokenHash, &tag, HASH_ENTER, &found); + elog(DEBUG3, "CDB_ENDPOINT: Finish endpoint init. Found EndpointTokenEntry? %d", found); /* * Save the token if it is the first time we create endpoint in current @@ -571,8 +564,8 @@ setup_session_info_entry() */ if (!found) { - token = get_or_create_token(); - memcpy(infoEntry->token, token, ENDPOINT_TOKEN_HEX_LEN); + token = create_endpoint_token(); + memcpy(infoEntry->token, token, ENDPOINT_TOKEN_ARR_LEN); infoEntry->refCount = 0; } @@ -629,8 +622,10 @@ checkQDConnectionAlive() * from the queue, the queue will be not available for receiver. */ static void -wait_receiver(EndpointExecState * state) +wait_receiver(void) { + EndpointExecState * state = CurrentEndpointExecState; + elog(DEBUG3, "CDB_ENDPOINTS: wait receiver"); while (true) { @@ -645,7 +640,7 @@ wait_receiver(EndpointExecState * state) wr = WaitLatchOrSocket(&state->endpoint->ackDone, WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_TIMEOUT | WL_SOCKET_READABLE, MyProcPort->sock, - WAIT_ENDPOINT_TIMEOUT, + WAIT_ENDPOINT_TIMEOUT_MS, PG_WAIT_PARALLEL_RETRIEVE_CURSOR); if (wr & WL_SOCKET_READABLE) @@ -654,14 +649,14 @@ wait_receiver(EndpointExecState * state) { ereport(LOG, (errmsg("CDB_ENDPOINT: sender found that the connection to QD is broken"))); - abort_endpoint(state); + abort_endpoint(); proc_exit(0); } } if (wr & WL_POSTMASTER_DEATH) { - abort_endpoint(state); + abort_endpoint(); ereport(LOG, (errmsg("CDB_ENDPOINT: postmaster exit, close shared memory message queue"))); proc_exit(0); @@ -699,15 +694,10 @@ detach_mq(dsm_segment *dsmSeg) * Needs to be called with exclusive lock on ParallelCursorEndpointLock. */ static void -unset_endpoint_sender_pid(Endpoint endpoint) +unset_endpoint_sender_pid(Endpoint *endpoint) { - SessionTokenTag tag; - - tag.sessionID = gp_session_id; - tag.userID = GetSessionUserId(); - - if (endpoint == NULL || endpoint->empty) - return; + Assert(endpoint); + Assert(!endpoint->empty); elog(DEBUG3, "CDB_ENDPOINT: unset endpoint sender pid"); @@ -717,18 +707,19 @@ unset_endpoint_sender_pid(Endpoint endpoint) */ Assert(MyProcPid == endpoint->senderPid || endpoint->senderPid == InvalidPid); - if (MyProcPid == endpoint->senderPid) - { - endpoint->senderPid = InvalidPid; - } + Assert(!am_cursor_retrieve_handler); + + endpoint->senderPid = InvalidPid; } /* * abort_endpoint - xact abort routine for endpoint */ static void -abort_endpoint(EndpointExecState * state) +abort_endpoint(void) { + EndpointExecState * state = CurrentEndpointExecState; + if (state->dest) { /* @@ -798,7 +789,7 @@ wait_parallel_retrieve_close(void) wr = WaitLatchOrSocket(&MyProc->procLatch, WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_TIMEOUT | WL_SOCKET_READABLE, MyProcPort->sock, - WAIT_ENDPOINT_TIMEOUT, + WAIT_ENDPOINT_TIMEOUT_MS, PG_WAIT_PARALLEL_RETRIEVE_CURSOR); if (wr & WL_POSTMASTER_DEATH) @@ -829,10 +820,11 @@ wait_parallel_retrieve_close(void) * Needs to be called with exclusive lock on ParallelCursorEndpointLock. */ static void -free_endpoint(Endpoint endpoint) +free_endpoint(Endpoint *endpoint) { - SessionTokenTag tag; - SessionInfoEntry *infoEntry = NULL; + EndpointTokenTag tag; + EndpointTokenEntry *infoEntry = NULL; + bool found; Assert(endpoint); Assert(!endpoint->empty); @@ -850,20 +842,20 @@ free_endpoint(Endpoint endpoint) tag.sessionID = endpoint->sessionID; tag.userID = endpoint->userID; - infoEntry = (SessionInfoEntry *) hash_search( - sharedSessionInfoHash, &tag, HASH_FIND, NULL); - if (infoEntry) - { - infoEntry->refCount--; - if (infoEntry->refCount == 0) - hash_search(sharedSessionInfoHash, &tag, HASH_REMOVE, NULL); - } + infoEntry = (EndpointTokenEntry *) hash_search( + EndpointTokenHash, &tag, HASH_FIND, &found); + Assert(found); + + infoEntry->refCount--; + if (infoEntry->refCount == 0) + hash_search(EndpointTokenHash, &tag, HASH_REMOVE, NULL); + endpoint->sessionID = InvalidEndpointSessionId; endpoint->userID = InvalidOid; } Endpoint -get_endpointdesc_by_index(int index) +*get_endpointdesc_by_index(int index) { Assert(index > -1 && index < MAX_ENDPOINT_SIZE); return &sharedEndpoints[index]; @@ -881,11 +873,13 @@ get_endpointdesc_by_index(int index) * The caller is responsible for acquiring ParallelCursorEndpointLock lock. */ Endpoint -find_endpoint(const char *endpointName, int sessionID) +*find_endpoint(const char *endpointName, int sessionID) { - Endpoint res = NULL; + Endpoint *res = NULL; - Assert(endpointName); + Assert(endpointName && strlen(endpointName) > 0); + Assert(LWLockHeldByMe(ParallelCursorEndpointLock)); + Assert(sessionID != InvalidEndpointSessionId); for (int i = 0; i < MAX_ENDPOINT_SIZE; ++i) { @@ -908,21 +902,21 @@ find_endpoint(const char *endpointName, int sessionID) void get_token_from_session_hashtable(int sessionId, Oid userID, int8 *token /* out */ ) { - SessionInfoEntry *infoEntry = NULL; - SessionTokenTag tag; + EndpointTokenEntry *infoEntry = NULL; + EndpointTokenTag tag; tag.sessionID = sessionId; tag.userID = userID; LWLockAcquire(ParallelCursorEndpointLock, LW_SHARED); - infoEntry = (SessionInfoEntry *) hash_search(sharedSessionInfoHash, &tag, + infoEntry = (EndpointTokenEntry *) hash_search(EndpointTokenHash, &tag, HASH_FIND, NULL); if (infoEntry == NULL) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("token for user id: %u, session: %d doesn't exist", tag.userID, sessionId))); - memcpy(token, infoEntry->token, ENDPOINT_TOKEN_HEX_LEN); + memcpy(token, infoEntry->token, ENDPOINT_TOKEN_ARR_LEN); LWLockRelease(ParallelCursorEndpointLock); } @@ -934,12 +928,12 @@ int get_session_id_from_token(Oid userID, const int8 *token) { int sessionId = InvalidEndpointSessionId; - SessionInfoEntry *infoEntry = NULL; + EndpointTokenEntry *infoEntry = NULL; HASH_SEQ_STATUS status; LWLockAcquire(ParallelCursorEndpointLock, LW_SHARED); - hash_seq_init(&status, sharedSessionInfoHash); - while ((infoEntry = (SessionInfoEntry *) hash_seq_search(&status)) != NULL) + hash_seq_init(&status, EndpointTokenHash); + while ((infoEntry = (EndpointTokenEntry *) hash_seq_search(&status)) != NULL) { if (endpoint_token_hex_equals(infoEntry->token, token) && userID == infoEntry->tag.userID) @@ -954,75 +948,37 @@ get_session_id_from_token(Oid userID, const int8 *token) return sessionId; } -/* - * Generate the endpoint name. - */ -static void -generate_endpoint_name(char *name, const char *cursorName) -{ - int len, - cursorLen; - - len = 0; - - /* part1: cursor name */ - cursorLen = strlen(cursorName); - if (cursorLen > ENDPOINT_NAME_CURSOR_LEN) - cursorLen = ENDPOINT_NAME_CURSOR_LEN; - memcpy(name, cursorName, cursorLen); - len += cursorLen; - - /* part2: gp_session_id */ - snprintf(name + len, ENDPOINT_NAME_SESSIONID_LEN + 1, "%08x", gp_session_id); - len += ENDPOINT_NAME_SESSIONID_LEN; - - /* - * part3: gp_command_count In theory cursor name + gp_session_id is - * enough, but we'd keep this part to avoid confusion or potential issues - * for the scenario that in the same session (thus same gp_session_id), - * two endpoints with same cursor names (happens the cursor is - * dropped/rollbacked and then recreated) and retrieve the endpoints would - * be confusing for users that in the same retrieve connection. - */ - snprintf(name + len, ENDPOINT_NAME_COMMANDID_LEN + 1, "%08x", gp_command_count); - len += ENDPOINT_NAME_COMMANDID_LEN; - - name[len] = '\0'; -} - /* * Called during xaction abort. */ void AtAbort_EndpointExecState() { - EndpointExecState *state = CurrEndpointExecState; + EndpointExecState *state = CurrentEndpointExecState; if (state != NULL) { - abort_endpoint(state); + abort_endpoint(); pfree(state); - CurrEndpointExecState = NULL; + CurrentEndpointExecState = NULL; } } -EndpointExecState * +/* allocate new EndpointExecState and set it to CurrentEndpointExecState */ +void allocEndpointExecState() { EndpointExecState *endpointExecState; MemoryContext oldcontext; - if (unlikely(CurrEndpointExecState != NULL)) - ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("previous endpoint estate is not cleaned up"))); + /* Previous endpoint estate should be cleaned up. */ + Assert(!CurrentEndpointExecState); oldcontext = MemoryContextSwitchTo(TopMemoryContext); endpointExecState = palloc0(sizeof(EndpointExecState)); - CurrEndpointExecState = endpointExecState; + CurrentEndpointExecState = endpointExecState; MemoryContextSwitchTo(oldcontext); - - return endpointExecState; } diff --git a/src/backend/cdb/endpoint/cdbendpoint_private.h b/src/backend/cdb/endpoint/cdbendpoint_private.h index 53e835744ec..79ed74bb4d8 100644 --- a/src/backend/cdb/endpoint/cdbendpoint_private.h +++ b/src/backend/cdb/endpoint/cdbendpoint_private.h @@ -17,8 +17,8 @@ #define CDBENDPOINTINTERNAL_H #define MAX_ENDPOINT_SIZE 1024 -#define ENDPOINT_TOKEN_HEX_LEN 16 -#define ENDPOINT_TOKEN_STR_LEN (ENDPOINT_TOKEN_HEX_LEN<<1) +#define ENDPOINT_TOKEN_ARR_LEN 16 +#define ENDPOINT_TOKEN_STR_LEN (ENDPOINT_TOKEN_ARR_LEN<<1) #define InvalidEndpointSessionId (-1) /* follows invalid * gp_session_id */ @@ -34,8 +34,6 @@ */ /* ACK NOTICE MESSAGE FROM ENDPOINT QE/Entry DB to QD */ -#define ENDPOINT_READY_ACK_MSG "ENDPOINT_READY" -#define ENDPOINT_FINISHED_ACK_MSG "ENDPOINT_FINISHED" #define ENDPOINT_NAME_SESSIONID_LEN 8 #define ENDPOINT_NAME_COMMANDID_LEN 8 #define ENDPOINT_NAME_CURSOR_LEN (NAMEDATALEN - 1 - ENDPOINT_NAME_SESSIONID_LEN - ENDPOINT_NAME_COMMANDID_LEN) @@ -43,16 +41,16 @@ extern void check_parallel_retrieve_cursor_errors(EState *estate); /* Endpoint shared memory utility functions in "cdbendpoint.c" */ -extern Endpoint get_endpointdesc_by_index(int index); -extern Endpoint find_endpoint(const char *endpointName, int sessionID); +extern Endpoint *get_endpointdesc_by_index(int index); +extern Endpoint *find_endpoint(const char *endpointName, int sessionID); extern void get_token_from_session_hashtable(int sessionId, Oid userID, int8 *token /* out */ ); extern int get_session_id_from_token(Oid userID, const int8 *token); /* utility functions in "cdbendpointutilities.c" */ extern bool endpoint_token_hex_equals(const int8 *token1, const int8 *token2); extern bool endpoint_name_equals(const char *name1, const char *name2); -extern void endpoint_token_str2hex(int8 *token, const char *tokenStr); -extern void endpoint_token_hex2str(const int8 *token, char *tokenStr); +extern void endpoint_token_str2arr(const char *tokenStr, int8 *token); +extern void endpoint_token_arr2str(const int8 *token, char *tokenStr); extern char *state_enum_to_string(EndpointState state); #endif /* CDBENDPOINTINTERNAL_H */ diff --git a/src/backend/cdb/endpoint/cdbendpointretrieve.c b/src/backend/cdb/endpoint/cdbendpointretrieve.c index f1d8ac3c6a0..a51aa678af6 100644 --- a/src/backend/cdb/endpoint/cdbendpointretrieve.c +++ b/src/backend/cdb/endpoint/cdbendpointretrieve.c @@ -73,7 +73,7 @@ typedef struct RetrieveExecEntry /* The name of endpoint to be retrieved, also behave as hash key */ char endpointName[NAMEDATALEN]; /* The endpoint to be retrieved */ - Endpoint endpoint; + Endpoint *endpoint; /* The dsm handle which contains shared memory message queue */ dsm_segment *mqSeg; /* Shared memory message queue */ @@ -95,7 +95,7 @@ typedef struct RetrieveControl * Track current retrieve entry in executor. Multiple entries are allowed * to be in one retrieve session but only one entry is active. */ - RetrieveExecEntry *entry; + RetrieveExecEntry *current_entry; /* * Hash table to cache tuple descriptors for all endpoint_names which have @@ -116,21 +116,21 @@ static RetrieveControl RetrieveCtl = }; static void init_retrieve_exec_entry(RetrieveExecEntry * entry); -static Endpoint get_endpoint_from_retrieve_exec_entry(RetrieveExecEntry * entry, bool noError); -static RetrieveExecEntry * start_retrieve(const char *endpointName); -static void validate_retrieve_endpoint(Endpoint endpointDesc, const char *endpointName); -static void finish_retrieve(RetrieveExecEntry * entry, bool resetPID); -static void attach_receiver_mq(RetrieveExecEntry * entry, dsm_handle dsmHandle); -static void detach_receiver_mq(RetrieveExecEntry * entry); -static void notify_sender(RetrieveExecEntry * entry, bool finished); -static void retrieve_cancel_action(RetrieveExecEntry * entry, char *msg); +static Endpoint *get_endpoint_from_retrieve_exec_entry(RetrieveExecEntry *entry, bool noError); +static void start_retrieve(const char *endpointName); +static void validate_retrieve_endpoint(Endpoint *endpointDesc, const char *endpointName); +static void finish_retrieve(bool resetPID); +static void attach_receiver_mq(dsm_handle dsmHandle); +static void detach_receiver_mq(RetrieveExecEntry *entry); +static void notify_sender(bool finished); +static void retrieve_cancel_action(RetrieveExecEntry *entry, char *msg); static void retrieve_exit_callback(int code, Datum arg); static void retrieve_xact_callback(XactEvent ev, void *arg); static void retrieve_subxact_callback(SubXactEvent event, SubTransactionId mySubid, SubTransactionId parentSubid, void *arg); -static TupleTableSlot *receive_tuple_slot(RetrieveExecEntry * entry); +static TupleTableSlot *retrieve_next_tuple(void); /* * AuthEndpoint - Authenticate for retrieve connection. @@ -141,9 +141,9 @@ bool AuthEndpoint(Oid userID, const char *tokenStr) { bool found = false; - int8 token[ENDPOINT_TOKEN_HEX_LEN] = {0}; + int8 token[ENDPOINT_TOKEN_ARR_LEN] = {0}; - endpoint_token_str2hex(token, tokenStr); + endpoint_token_str2arr(tokenStr, token); RetrieveCtl.sessionID = get_session_id_from_token(userID, token); if (RetrieveCtl.sessionID != InvalidEndpointSessionId) @@ -166,9 +166,9 @@ AuthEndpoint(Oid userID, const char *tokenStr) TupleDesc GetRetrieveStmtTupleDesc(const RetrieveStmt * stmt) { - RetrieveCtl.entry = start_retrieve(stmt->endpoint_name); + start_retrieve(stmt->endpoint_name); - return RetrieveCtl.entry->retrieveTs->tts_tupleDescriptor; + return RetrieveCtl.current_entry->retrieveTs->tts_tupleDescriptor; } /* @@ -176,16 +176,16 @@ GetRetrieveStmtTupleDesc(const RetrieveStmt * stmt) * * This function tries to use the endpoint name in the RetrieveStmt to find the * attached endpoint in this retrieve session. If the endpoint can be found, - * then read from the message queue to feed the given DestReceiver. And mark - * the endpoint as detached before returning. + * then read from the message queue to feed the active portal's tuplestore. And + * mark the endpoint as detached before returning. */ void -ExecRetrieveStmt(const RetrieveStmt * stmt, DestReceiver *dest) +ExecRetrieveStmt(const RetrieveStmt *stmt, DestReceiver *dest) { TupleTableSlot *result = NULL; int64 retrieveCount = 0; - if (RetrieveCtl.entry == NULL) + if (RetrieveCtl.current_entry == NULL) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("endpoint %s is not attached", stmt->endpoint_name))); @@ -197,11 +197,14 @@ ExecRetrieveStmt(const RetrieveStmt * stmt, DestReceiver *dest) "count should not be: %ld", retrieveCount))); - if (RetrieveCtl.entry->retrieveState < RETRIEVE_STATE_FINISHED) + Assert(dest->mydest == DestTuplestore); + Assert(RetrieveCtl.current_entry->retrieveState > RETRIEVE_STATE_INIT); + + if (RetrieveCtl.current_entry->retrieveState < RETRIEVE_STATE_FINISHED) { while (stmt->is_all || retrieveCount > 0) { - result = receive_tuple_slot(RetrieveCtl.entry); + result = retrieve_next_tuple(); if (!result) break; @@ -210,8 +213,12 @@ ExecRetrieveStmt(const RetrieveStmt * stmt, DestReceiver *dest) retrieveCount--; } } + else + { + /* All tuples have already been retrieved. Nothing to do */ + } - finish_retrieve(RetrieveCtl.entry, false); + finish_retrieve(false); } /* @@ -236,7 +243,7 @@ init_retrieve_exec_entry(RetrieveExecEntry * entry) * if there is something wrong during validation, warn or error out, depending * on the parameter noError. */ -static Endpoint +static Endpoint* get_endpoint_from_retrieve_exec_entry(RetrieveExecEntry * entry, bool noError) { Assert(LWLockHeldByMe(ParallelCursorEndpointLock)); @@ -268,6 +275,26 @@ get_endpoint_from_retrieve_exec_entry(RetrieveExecEntry * entry, bool noError) return entry->endpoint; } +/* + * Initialize a hashtable, its key is the endpoint's name, its value is + * RetrieveExecEntry +*/ +void InitRetrieveCtl(void) +{ + HASHCTL ctl; + + if (RetrieveCtl.RetrieveExecEntryHTB) + return; + + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = NAMEDATALEN; + ctl.entrysize = sizeof(RetrieveExecEntry); + ctl.hash = string_hash; + RetrieveCtl.RetrieveExecEntryHTB = hash_create("retrieve hash", MAX_ENDPOINT_SIZE, &ctl, + (HASH_ELEM | HASH_FUNCTION)); + RetrieveCtl.current_entry = NULL; +} + /* * start_retrieve - start to retrieve an endpoint. * @@ -280,35 +307,17 @@ get_endpoint_from_retrieve_exec_entry(RetrieveExecEntry * entry, bool noError) * When call RETRIEVE statement in PQprepare() & PQexecPrepared(), this func will * be called 2 times. */ -static RetrieveExecEntry * +static void start_retrieve(const char *endpointName) { - HTAB *entryHTB; + HTAB *entryHTB = RetrieveCtl.RetrieveExecEntryHTB; RetrieveExecEntry *entry = NULL; bool found = false; - Endpoint endpoint; + Endpoint *endpoint; dsm_handle handle = DSM_HANDLE_INVALID; - /* - * Initialize a hashtable, its key is the endpoint's name, its value is - * RetrieveExecEntry - */ - entryHTB = RetrieveCtl.RetrieveExecEntryHTB; - if (entryHTB == NULL) - { - HASHCTL ctl; - MemSet(&ctl, 0, sizeof(ctl)); - ctl.keysize = NAMEDATALEN; - ctl.entrysize = sizeof(RetrieveExecEntry); - ctl.hash = string_hash; - RetrieveCtl.RetrieveExecEntryHTB = hash_create("retrieve hash", MAX_ENDPOINT_SIZE, &ctl, - (HASH_ELEM | HASH_FUNCTION)); - entryHTB = RetrieveCtl.RetrieveExecEntryHTB; - found = false; - } - else - entry = hash_search(entryHTB, endpointName, HASH_FIND, &found); + entry = hash_search(entryHTB, endpointName, HASH_FIND, &found); LWLockAcquire(ParallelCursorEndpointLock, LW_EXCLUSIVE); @@ -319,8 +328,8 @@ start_retrieve(const char *endpointName) endpoint = find_endpoint(endpointName, RetrieveCtl.sessionID); if (!endpoint) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("the endpoint %s does not exist in the session", - endpointName))); + errmsg("the endpoint %s does not exist for session id %d", + endpointName, RetrieveCtl.sessionID))); validate_retrieve_endpoint(endpoint, endpointName); endpoint->receiverPid = MyProcPid; handle = endpoint->mqDsmHandle; @@ -340,13 +349,14 @@ start_retrieve(const char *endpointName) LWLockRelease(ParallelCursorEndpointLock); entry->endpoint = endpoint; + + RetrieveCtl.current_entry = entry; + if (!found) - attach_receiver_mq(entry, handle); + attach_receiver_mq(handle); if (CurrentSession->segment == NULL) AttachSession(endpoint->sessionDsmHandle); - - return entry; } /* @@ -354,7 +364,7 @@ start_retrieve(const char *endpointName) * validate whether it meets the requirement. */ static void -validate_retrieve_endpoint(Endpoint endpoint, const char *endpointName) +validate_retrieve_endpoint(Endpoint *endpoint, const char *endpointName) { Assert(endpoint->mqDsmHandle != DSM_HANDLE_INVALID); @@ -403,18 +413,19 @@ validate_retrieve_endpoint(Endpoint endpoint, const char *endpointName) * Attach to the endpoint's shared memory message queue. */ static void -attach_receiver_mq(RetrieveExecEntry * entry, dsm_handle dsmHandle) +attach_receiver_mq(dsm_handle dsmHandle) { TupleDesc td; TupleDescNode *tupdescnode; - dsm_segment *dsmSeg; MemoryContext oldcontext; shm_toc *toc; void *lookup_space; int td_len; + RetrieveExecEntry *entry = RetrieveCtl.current_entry; Assert(!entry->mqSeg); Assert(!entry->mqHandle); + Assert(entry->retrieveState == RETRIEVE_STATE_INIT); /* * Store the result slot all the retrieve mode QE life cycle, we only have @@ -424,14 +435,17 @@ attach_receiver_mq(RetrieveExecEntry * entry, dsm_handle dsmHandle) elog(DEBUG3, "CDB_ENDPOINTS: init message queue conn for receiver"); - dsmSeg = dsm_attach(dsmHandle); - if (dsmSeg == NULL) - ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("attach to shared message queue failed"))); - entry->mqSeg = dsmSeg; + entry->mqSeg = dsm_attach(dsmHandle); + if (entry->mqSeg == NULL) + ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("attach to endpoint shared message queue failed"))); - dsm_pin_mapping(dsmSeg); - toc = shm_toc_attach(ENDPOINT_MSG_QUEUE_MAGIC, dsm_segment_address(dsmSeg)); + dsm_pin_mapping(entry->mqSeg); + toc = shm_toc_attach(ENDPOINT_MSG_QUEUE_MAGIC, dsm_segment_address(entry->mqSeg)); + if (toc == NULL) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("invalid magic number in dynamic shared memory segment"))); /* * Find the shared mq for tuple receiving from 'toc' and set up the @@ -440,7 +454,7 @@ attach_receiver_mq(RetrieveExecEntry * entry, dsm_handle dsmHandle) shm_mq *mq = shm_toc_lookup(toc, ENDPOINT_KEY_TUPLE_QUEUE, false); shm_mq_set_receiver(mq, MyProc); - entry->mqHandle = shm_mq_attach(mq, dsmSeg, NULL); + entry->mqHandle = shm_mq_attach(mq, entry->mqSeg, NULL); /* * Find the tuple descritpr information from 'toc' and set the tuple @@ -482,12 +496,12 @@ detach_receiver_mq(RetrieveExecEntry * entry) * If current endpoint get freed, it means the endpoint aborted. */ static void -notify_sender(RetrieveExecEntry * entry, bool finished) +notify_sender(bool finished) { - Endpoint endpoint; + Endpoint *endpoint; LWLockAcquire(ParallelCursorEndpointLock, LW_SHARED); - endpoint = get_endpoint_from_retrieve_exec_entry(entry, false); + endpoint = get_endpoint_from_retrieve_exec_entry(RetrieveCtl.current_entry, false); if (finished) endpoint->state = ENDPOINTSTATE_FINISHED; SetLatch(&endpoint->ackDone); @@ -500,11 +514,12 @@ notify_sender(RetrieveExecEntry * entry, bool finished) * When reading all tuples, should tell sender that retrieve is done. */ static TupleTableSlot * -receive_tuple_slot(RetrieveExecEntry * entry) +retrieve_next_tuple() { TupleTableSlot *result = NULL; MinimalTuple tup = NULL; bool readerdone = false; + RetrieveExecEntry *entry = RetrieveCtl.current_entry; CHECK_FOR_INTERRUPTS(); @@ -524,8 +539,8 @@ receive_tuple_slot(RetrieveExecEntry * entry) * wait_receiver() */ elog(DEBUG3, "CDB_ENDPOINT: receiver notifies sender in " - "receive_tuple_slot() when retrieving data for the first time"); - notify_sender(entry, false); + "retrieve_next_tuple() when retrieving data for the first time"); + notify_sender(false); } SIMPLE_FAULT_INJECTOR("fetch_tuples_from_endpoint"); @@ -557,7 +572,7 @@ receive_tuple_slot(RetrieveExecEntry * entry) * the transient record tuples. */ entry->retrieveState = RETRIEVE_STATE_FINISHED; - notify_sender(entry, true); + notify_sender(true); return NULL; } @@ -575,9 +590,6 @@ receive_tuple_slot(RetrieveExecEntry * entry) /* * finish_retrieve - Finish a retrieve statement. * - * When finishing retrieve statement, if this process have not yet finished this - * message queue reading, then don't reset its pid. - * * If current retrieve statement retrieve all tuples from endpoint. Set * endpoint state to ENDPOINTSTATE_FINISHED. Otherwise, set endpoint's status * from ENDPOINTSTATE_RETRIEVING to ENDPOINTSTATE_ATTACHED. @@ -586,9 +598,10 @@ receive_tuple_slot(RetrieveExecEntry * entry) * Errors in these function is not expected to be raised. */ static void -finish_retrieve(RetrieveExecEntry * entry, bool resetPID) +finish_retrieve(bool resetPID) { - Endpoint endpoint = NULL; + Endpoint *endpoint = NULL; + RetrieveExecEntry *entry = RetrieveCtl.current_entry; Assert(entry); @@ -601,9 +614,15 @@ finish_retrieve(RetrieveExecEntry * entry, bool resetPID) * the retrieve abort stage, sender cleaned the Endpoint entry. And * another endpoint gets allocated just after the cleanup, which will * occupy current endpoint entry. + * remove the entry from RetrieveCtl.RetrieveExecEntryHTB also. + * to avoid next statement in start_retrieve can get entry from RetrieveCtl.RetrieveExecEntryHTB, + * however, can not get endpoint through get_endpoint_from_retrieve_exec_entry. */ LWLockRelease(ParallelCursorEndpointLock); - RetrieveCtl.entry = NULL; + elog(DEBUG3, "the Endpoint entry %s has already been cleaned, \ + remove from RetrieveCtl.RetrieveExecEntryHTB hash table.", entry->endpointName); + hash_search(RetrieveCtl.RetrieveExecEntryHTB, entry->endpointName, HASH_REMOVE, NULL); + RetrieveCtl.current_entry = NULL; return; } @@ -634,7 +653,7 @@ finish_retrieve(RetrieveExecEntry * entry, bool resetPID) } LWLockRelease(ParallelCursorEndpointLock); - RetrieveCtl.entry = NULL; + RetrieveCtl.current_entry = NULL; } /* @@ -644,7 +663,7 @@ finish_retrieve(RetrieveExecEntry * entry, bool resetPID) static void retrieve_cancel_action(RetrieveExecEntry * entry, char *msg) { - Endpoint endpoint; + Endpoint *endpoint; Assert(entry); @@ -716,8 +735,8 @@ retrieve_exit_callback(int code, Datum arg) return; /* If the current retrieve statement has not fnished in this run. */ - if (RetrieveCtl.entry) - finish_retrieve(RetrieveCtl.entry, true); + if (RetrieveCtl.current_entry) + finish_retrieve(true); /* Cancel all partially retrieved endpoints in this session. */ hash_seq_init(&status, entryHTB); @@ -751,12 +770,13 @@ retrieve_xact_callback(XactEvent ev, void *arg pg_attribute_unused()) { elog(DEBUG3, "CDB_ENDPOINT: retrieve xact abort callback"); if (RetrieveCtl.sessionID != InvalidEndpointSessionId && - RetrieveCtl.entry) + RetrieveCtl.current_entry) { - if (RetrieveCtl.entry->retrieveState != RETRIEVE_STATE_FINISHED) - retrieve_cancel_action(RetrieveCtl.entry, + if (RetrieveCtl.current_entry->retrieveState != RETRIEVE_STATE_FINISHED) + retrieve_cancel_action(RetrieveCtl.current_entry, "Endpoint retrieve statement aborted"); - finish_retrieve(RetrieveCtl.entry, true); + finish_retrieve(true); + } } diff --git a/src/backend/cdb/endpoint/cdbendpointutils.c b/src/backend/cdb/endpoint/cdbendpointutils.c index db815115b17..8731836a466 100644 --- a/src/backend/cdb/endpoint/cdbendpointutils.c +++ b/src/backend/cdb/endpoint/cdbendpointutils.c @@ -25,16 +25,16 @@ #include "cdb/cdbutil.h" #include "cdb/cdbvars.h" -/* The two struct are used by gp_endpoints(). */ +/* These two structures are containers for the columns returned by the UDFs. */ typedef struct { char name[NAMEDATALEN]; char cursorName[NAMEDATALEN]; - int8 token[ENDPOINT_TOKEN_HEX_LEN]; - int dbid; + int8 token[ENDPOINT_TOKEN_ARR_LEN]; + int segmentIndex; EndpointState state; - Oid userId; + char userName[NAMEDATALEN]; int sessionId; } EndpointInfo; @@ -49,26 +49,27 @@ typedef struct static EndpointState state_string_to_enum(const char *state); /* - * Convert the string-format token to int (e.g. "123456789" to 0x123456789). + * Convert the string-format token to array + * (e.g. "123456789ABCDEF0" to [1,2,3,4,5,6,7,8,9,A,B,C,D,E,F,0]). */ void -endpoint_token_str2hex(int8 *token, const char *tokenStr) +endpoint_token_str2arr(const char *tokenStr, int8 *token) { if (strlen(tokenStr) == ENDPOINT_TOKEN_STR_LEN) hex_decode(tokenStr, ENDPOINT_TOKEN_STR_LEN, (char *) token); else - ereport(FATAL, - (errcode(ERRCODE_INVALID_PASSWORD), + ereport(FATAL, (errcode(ERRCODE_INVALID_PASSWORD), errmsg("retrieve auth token is invalid"))); } /* - * Convert the hex-format token to string (e.g. 0x123456789 to "123456789"). + * Convert the array-format token to string + * (e.g. [1,2,3,4,5,6,7,8,9,A,B,C,D,E,F,0] to "123456789ABCDEF0"). */ void -endpoint_token_hex2str(const int8 *token, char *tokenStr) +endpoint_token_arr2str(const int8 *token, char *tokenStr) { - hex_encode((const char *) token, ENDPOINT_TOKEN_HEX_LEN, tokenStr); + hex_encode((const char *) token, ENDPOINT_TOKEN_ARR_LEN, tokenStr); tokenStr[ENDPOINT_TOKEN_STR_LEN] = 0; } @@ -82,7 +83,7 @@ endpoint_token_hex_equals(const int8 *token1, const int8 *token2) * memcmp should be good enough. Timing attack would not be a concern * here. */ - return memcmp(token1, token2, ENDPOINT_TOKEN_HEX_LEN) == 0; + return memcmp(token1, token2, ENDPOINT_TOKEN_ARR_LEN) == 0; } bool @@ -162,7 +163,7 @@ check_parallel_retrieve_cursor_errors(EState *estate) } /* - * On QD, display all the endpoints information in shared memory. + * On QD, display all the endpoints information is in shared memory. * * Note: * As a superuser, it can list all endpoints info of all users', but for @@ -170,11 +171,11 @@ check_parallel_retrieve_cursor_errors(EState *estate) * security reason. */ Datum -gp_endpoints(PG_FUNCTION_ARGS) +gp_get_endpoints(PG_FUNCTION_ARGS) { if (Gp_role != GP_ROLE_DISPATCH) ereport(ERROR, (errcode(ERRCODE_GP_COMMAND_ERROR), - errmsg("gp_endpoints() can be called on query dispatcher only"))); + errmsg("gp_get_endpoints() could only be called on QD"))); FuncCallContext *funcctx; AllEndpointsInfo *all_info; @@ -197,13 +198,13 @@ gp_endpoints(PG_FUNCTION_ARGS) TupleDesc tupdesc = CreateTemplateTupleDesc(9); - TupleDescInitEntry(tupdesc, (AttrNumber) 1, "dbid", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "gp_segment_id", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "auth_token", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "cursorname", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "sessionid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "hostname", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "port", INT4OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 7, "userid", OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "username", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "state", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "endpointname", TEXTOID, -1, 0); @@ -216,27 +217,28 @@ gp_endpoints(PG_FUNCTION_ARGS) CdbPgResults cdb_pgresults = {NULL, 0}; - CdbDispatchCommand("SELECT endpointname,cursorname,auth_token,dbid," - "state,userid, sessionid" - " FROM pg_catalog.gp_segment_endpoints()", + CdbDispatchCommand("SELECT endpointname,cursorname,auth_token,gp_segment_id," + "state,username,sessionid" + " FROM pg_catalog.gp_get_segment_endpoints()", DF_WITH_SNAPSHOT | DF_CANCEL_ON_ERROR, &cdb_pgresults); if (cdb_pgresults.numResults == 0) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("gp_segment_endpoints didn't get back any data " - "from the segDBs"))); + errmsg("gp_get_segment_endpoints() failed to fetch data from segDBs"))); } res_number = 0; for (int i = 0; i < cdb_pgresults.numResults; i++) { - if (PQresultStatus(cdb_pgresults.pg_results[i]) != PGRES_TUPLES_OK) + ExecStatusType result_status = PQresultStatus(cdb_pgresults.pg_results[i]); + if (result_status != PGRES_TUPLES_OK) { cdbdisp_clearCdbPgResults(&cdb_pgresults); ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("gp_segment_endpoints(): resultStatus is not TUPLES_OK"))); + errmsg("gp_get_segment_endpoints(): resultStatus is %s", + PQresStatus(result_status)))); } res_number += PQntuples(cdb_pgresults.pg_results[i]); } @@ -255,10 +257,10 @@ gp_endpoints(PG_FUNCTION_ARGS) { strlcpy(all_info->infos[idx].name, PQgetvalue(result, j, 0), NAMEDATALEN); strlcpy(all_info->infos[idx].cursorName, PQgetvalue(result, j, 1), NAMEDATALEN); - endpoint_token_str2hex(all_info->infos[idx].token, PQgetvalue(result, j, 2)); - all_info->infos[idx].dbid = atoi(PQgetvalue(result, j, 3)); + endpoint_token_str2arr(PQgetvalue(result, j, 2), all_info->infos[idx].token); + all_info->infos[idx].segmentIndex = atoi(PQgetvalue(result, j, 3)); all_info->infos[idx].state = state_string_to_enum(PQgetvalue(result, j, 4)); - all_info->infos[idx].userId = (Oid) strtoul(PQgetvalue(result, j, 5), NULL, 10); + strlcpy(all_info->infos[idx].userName, PQgetvalue(result, j, 5), NAMEDATALEN); all_info->infos[idx].sessionId = atoi(PQgetvalue(result, j, 6)); idx++; } @@ -271,9 +273,9 @@ gp_endpoints(PG_FUNCTION_ARGS) for (int i = 0; i < MAX_ENDPOINT_SIZE; i++) { - const Endpoint entry = get_endpointdesc_by_index(i); + const Endpoint *entry = get_endpointdesc_by_index(i); - if (!entry->empty && (superuser() || entry->userID == GetUserId())) + if (!entry->empty && entry->databaseID == MyDatabaseId && (superuser() || entry->userID == GetUserId())) cnt++; } if (cnt != 0) @@ -295,7 +297,7 @@ gp_endpoints(PG_FUNCTION_ARGS) for (int i = 0; i < MAX_ENDPOINT_SIZE; i++) { - const Endpoint entry = get_endpointdesc_by_index(i); + const Endpoint *entry = get_endpointdesc_by_index(i); /* * Only allow current user to get own endpoints. Or let @@ -305,17 +307,14 @@ gp_endpoints(PG_FUNCTION_ARGS) { EndpointInfo *info = &all_info->infos[idx]; - info->dbid = - contentid_get_dbid(MASTER_CONTENT_ID, - GP_SEGMENT_CONFIGURATION_ROLE_PRIMARY, - false); + info->segmentIndex = MASTER_CONTENT_ID; get_token_from_session_hashtable(entry->sessionID, entry->userID, info->token); strlcpy(info->name, entry->name, NAMEDATALEN); strlcpy(info->cursorName, entry->cursorName, NAMEDATALEN); info->state = entry->state; info->sessionId = entry->sessionID; - info->userId = entry->userID; + strlcpy(info->userName, GetUserNameFromId(entry->userID, false), NAMEDATALEN); idx++; } } @@ -334,19 +333,20 @@ gp_endpoints(PG_FUNCTION_ARGS) Datum result; char tokenStr[ENDPOINT_TOKEN_STR_LEN + 1]; EndpointInfo *info = &all_info->infos[all_info->cur_idx++]; - GpSegConfigEntry *segCnfInfo = dbid_get_dbinfo(info->dbid); + int16 dbid = contentid_get_dbid(info->segmentIndex, GP_SEGMENT_CONFIGURATION_ROLE_PRIMARY, false); + GpSegConfigEntry *segCnfInfo = dbid_get_dbinfo(dbid); MemSet(values, 0, sizeof(values)); MemSet(nulls, 0, sizeof(nulls)); - values[0] = Int32GetDatum(info->dbid); - endpoint_token_hex2str(info->token, tokenStr); + values[0] = Int32GetDatum(info->segmentIndex); + endpoint_token_arr2str(info->token, tokenStr); values[1] = CStringGetTextDatum(tokenStr); values[2] = CStringGetTextDatum(info->cursorName); values[3] = Int32GetDatum(info->sessionId); values[4] = CStringGetTextDatum(segCnfInfo->hostname); values[5] = Int32GetDatum(segCnfInfo->port); - values[6] = ObjectIdGetDatum(info->userId); + values[6] = CStringGetTextDatum(info->userName); values[7] = CStringGetTextDatum(state_enum_to_string(info->state)); values[8] = CStringGetTextDatum(info->name); @@ -365,8 +365,12 @@ gp_endpoints(PG_FUNCTION_ARGS) * Or only show current user's endpoints on this segment. */ Datum -gp_segment_endpoints(PG_FUNCTION_ARGS) +gp_get_segment_endpoints(PG_FUNCTION_ARGS) { + if (Gp_role != GP_ROLE_EXECUTE && Gp_role != GP_ROLE_UTILITY) + ereport(ERROR, (errcode(ERRCODE_GP_COMMAND_ERROR), + errmsg("gp_get_segment_endpoints() could only be called on QE"))); + FuncCallContext *funcctx; MemoryContext oldcontext; Datum values[10]; @@ -386,13 +390,13 @@ gp_segment_endpoints(PG_FUNCTION_ARGS) TupleDesc tupdesc = CreateTemplateTupleDesc(10); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "auth_token", TEXTOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 2, "databaseid", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "databaseid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "senderpid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "receiverpid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "state", TEXTOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 6, "dbid", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "gp_segment_id", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "sessionid", INT4OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 8, "userid", OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 8, "username", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "endpointname", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 10, "cursorname", TEXTOID, -1, 0); @@ -412,7 +416,7 @@ gp_segment_endpoints(PG_FUNCTION_ARGS) while (*endpoint_idx < MAX_ENDPOINT_SIZE) { Datum result; - const Endpoint entry = get_endpointdesc_by_index(*endpoint_idx); + const Endpoint *entry = get_endpointdesc_by_index(*endpoint_idx); MemSet(values, 0, sizeof(values)); MemSet(nulls, 0, sizeof(nulls)); @@ -421,23 +425,23 @@ gp_segment_endpoints(PG_FUNCTION_ARGS) * Only allow current user to list his/her own endpoints, or let * superuser list all endpoints. */ - if (!entry->empty && (superuser() || entry->userID == GetUserId())) + if (!entry->empty && entry->databaseID == MyDatabaseId && (superuser() || entry->userID == GetUserId())) { char *state = NULL; - int8 token[ENDPOINT_TOKEN_HEX_LEN]; + int8 token[ENDPOINT_TOKEN_ARR_LEN]; char tokenStr[ENDPOINT_TOKEN_STR_LEN + 1]; get_token_from_session_hashtable(entry->sessionID, entry->userID, token); - endpoint_token_hex2str(token, tokenStr); + endpoint_token_arr2str(token, tokenStr); values[0] = CStringGetTextDatum(tokenStr); - values[1] = Int32GetDatum(entry->databaseID); + values[1] = ObjectIdGetDatum(entry->databaseID); values[2] = Int32GetDatum(entry->senderPid); values[3] = Int32GetDatum(entry->receiverPid); state = state_enum_to_string(entry->state); values[4] = CStringGetTextDatum(state); - values[5] = Int32GetDatum(GpIdentity.dbid); + values[5] = Int32GetDatum(GpIdentity.segindex); values[6] = Int32GetDatum(entry->sessionID); - values[7] = ObjectIdGetDatum(entry->userID); + values[7] = CStringGetTextDatum(GetUserNameFromId(entry->userID, false)); values[8] = CStringGetTextDatum(entry->name); values[9] = CStringGetTextDatum(entry->cursorName); @@ -509,6 +513,42 @@ state_string_to_enum(const char *state) else { ereport(ERROR, (errmsg("unknown endpoint state %s", state))); - return ENDPOINTSTATE_INVALID; + return ENDPOINTSTATE_INVALID; /* make the compiler happy */ } } + +/* + * Generate the endpoint name. + */ +void +generate_endpoint_name(char *name, const char *cursorName) +{ + int len, + cursorLen; + + len = 0; + + /* part1: cursor name */ + cursorLen = strlen(cursorName); + if (cursorLen > ENDPOINT_NAME_CURSOR_LEN) + cursorLen = ENDPOINT_NAME_CURSOR_LEN; + memcpy(name, cursorName, cursorLen); + len += cursorLen; + + /* part2: gp_session_id */ + snprintf(name + len, ENDPOINT_NAME_SESSIONID_LEN + 1, "%08x", gp_session_id); + len += ENDPOINT_NAME_SESSIONID_LEN; + + /* + * part3: gp_command_count In theory cursor name + gp_session_id is + * enough, but we'd keep this part to avoid confusion or potential issues + * for the scenario that in the same session (thus same gp_session_id), + * two endpoints with same cursor names (happens the cursor is + * dropped/rollbacked and then recreated) and retrieve the endpoints would + * be confusing for users that in the same retrieve connection. + */ + snprintf(name + len, ENDPOINT_NAME_COMMANDID_LEN + 1, "%08x", gp_command_count); + len += ENDPOINT_NAME_COMMANDID_LEN; + + name[len] = '\0'; +} diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 6f1cd58b7fd..0062e841f72 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -2296,6 +2296,10 @@ ProcessIncomingNotify(bool flush) /* * Send NOTIFY message to my front end. + * + * GPDB: We have exposed this function globally for our dispatch-notify + * mechanism. We overload the srcPid field to pass in the gp_session_id + * from GPDB specific callsites. */ void NotifyMyFrontEnd(const char *channel, const char *payload, int32 srcPid) diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 87fe223d6c6..7033d6da4d5 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -33,6 +33,7 @@ #include "catalog/objectaccess.h" #include "catalog/pg_sequence.h" #include "catalog/pg_type.h" +#include "commands/async.h" #include "commands/defrem.h" #include "commands/sequence.h" #include "commands/tablecmds.h" @@ -2127,7 +2128,7 @@ seq_mask(char *page, BlockNumber blkno) /* * CDB: forward a nextval request from qExec to the QD */ -void +static void cdb_sequence_nextval_qe(Relation seqrel, int64 *plast, int64 *pcached, @@ -2156,11 +2157,7 @@ cdb_sequence_nextval_qe(Relation seqrel, */ char payload[128]; snprintf(payload, sizeof(payload), "%d:%d", dbid, seq_oid); - pq_beginmessage(&buf, 'A'); - pq_sendint(&buf, gp_session_id, sizeof(int32)); - pq_sendstring(&buf, "nextval"); /* channel */ - pq_sendstring(&buf, payload); - pq_endmessage(&buf); + NotifyMyFrontEnd("nextval", payload, gp_session_id); pq_flush(); /* diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index bdff0e8edd1..e0f3dbfc71e 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -114,6 +114,9 @@ #include "cdb/cdbutil.h" #include "cdb/cdbendpoint.h" +#define IS_PARALLEL_RETRIEVE_CURSOR(queryDesc) (queryDesc->ddesc && \ + queryDesc->ddesc->parallelCursorName && \ + strlen(queryDesc->ddesc->parallelCursorName) > 0) /* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */ ExecutorStart_hook_type ExecutorStart_hook = NULL; @@ -773,7 +776,7 @@ standard_ExecutorRun(QueryDesc *queryDesc, DestReceiver *dest; bool sendTuples; MemoryContext oldcontext; - EndpointExecState *endpointExecState = NULL; + bool endpointCreated = false; uint64 es_processed = 0; /* * NOTE: Any local vars that are set in the PG_TRY block and examined in the @@ -901,12 +904,7 @@ standard_ExecutorRun(QueryDesc *queryDesc, } else if (exec_identity == GP_ROOT_SLICE) { - bool isParallelRetrieveCursor = false; - DestReceiver *endpointDest = NULL; - - isParallelRetrieveCursor = (queryDesc->ddesc && - queryDesc->ddesc->parallelCursorName && - queryDesc->ddesc->parallelCursorName[0]); + DestReceiver *endpointDest; /* * When run a root slice, and it is a PARALLEL RETRIEVE CURSOR, it means @@ -917,32 +915,49 @@ standard_ExecutorRun(QueryDesc *queryDesc, * For the scenario: endpoint on QE, the query plan is changed, * the root slice also exists on QE. */ - if (isParallelRetrieveCursor) + if (IS_PARALLEL_RETRIEVE_CURSOR(queryDesc)) { - endpointExecState = allocEndpointExecState(); SetupEndpointExecState(queryDesc->tupDesc, queryDesc->ddesc->parallelCursorName, - endpointExecState); - endpointDest = endpointExecState->dest; - (endpointDest->rStartup)(endpointDest, operation, queryDesc->tupDesc); - } + operation, + &endpointDest); + endpointCreated = true; - /* - * Run a root slice - * It corresponds to the "normal" path through the executor - * in that we enter the plan at the top and count on the - * motion nodes at the fringe of the top slice to return - * without ever calling nodes below them. - */ - ExecutePlan(estate, - queryDesc->planstate, - amIParallel, - operation, - isParallelRetrieveCursor ? true : sendTuples, - count, - direction, - isParallelRetrieveCursor? endpointDest : dest, - execute_once); + /* + * Once the endpoint has been created in shared memory, send acknowledge + * message to QD so DECLARE PARALLEL RETRIEVE CURSOR statement can finish. + */ + EndpointNotifyQD(ENDPOINT_READY_ACK_MSG); + + ExecutePlan(estate, + queryDesc->planstate, + amIParallel, + operation, + true, + count, + direction, + endpointDest, + execute_once); + } + else + { + /* + * Run a root slice + * It corresponds to the "normal" path through the executor + * in that we enter the plan at the top and count on the + * motion nodes at the fringe of the top slice to return + * without ever calling nodes below them. + */ + ExecutePlan(estate, + queryDesc->planstate, + amIParallel, + operation, + sendTuples, + count, + direction, + dest, + execute_once); + } } else { @@ -994,8 +1009,8 @@ standard_ExecutorRun(QueryDesc *queryDesc, /* * shutdown tuple receiver, if we started it */ - if (endpointExecState != NULL) - DestroyEndpointExecState(endpointExecState); + if (endpointCreated) + DestroyEndpointExecState(); if (sendTuples) dest->rShutdown(dest); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 9cf83acc338..44daa2b1278 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -3639,9 +3639,12 @@ create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->path.total_cost = total_cost; pathnode->path.pathkeys = pathkeys; - ForeignServer *server = NULL; - switch (rel->exec_location) + if (Gp_role == GP_ROLE_DISPATCH) { + ForeignServer *server = NULL; + + switch (rel->exec_location) + { case FTEXECLOCATION_ANY: CdbPathLocus_MakeGeneral(&(pathnode->path.locus)); break; @@ -3657,6 +3660,12 @@ create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel, break; default: elog(ERROR, "unrecognized exec_location '%c'", rel->exec_location); + } + } + else + { + /* make entry locus for utility role */ + CdbPathLocus_MakeEntry(&(pathnode->path.locus)); } pathnode->fdw_outerpath = fdw_outerpath; diff --git a/src/backend/storage/ipc/shm_toc.c b/src/backend/storage/ipc/shm_toc.c index 863b98bf054..4b02c39e310 100644 --- a/src/backend/storage/ipc/shm_toc.c +++ b/src/backend/storage/ipc/shm_toc.c @@ -252,6 +252,7 @@ shm_toc_lookup(shm_toc *toc, uint64 key, bool noError) if (!noError) elog(ERROR, "could not find key " UINT64_FORMAT " in shm TOC at %p", key, toc); + return NULL; } diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index e8d415c1fb5..48f8f159097 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -5401,9 +5401,19 @@ PostgresMain(int argc, char *argv[], */ if (send_ready_for_query) { + char activity[50]; + memset(activity, 0, sizeof(activity)); + int remain = sizeof(activity); + + if (am_cursor_retrieve_handler) + { + strncpy(activity, "[retrieve] ", sizeof(activity)); + remain -= strlen(activity); + } if (IsAbortedTransactionBlockState()) { - set_ps_display("idle in transaction (aborted)"); + strncat(activity, "idle in transaction (aborted)", remain); + set_ps_display(activity); pgstat_report_activity(STATE_IDLEINTRANSACTION_ABORTED, NULL); /* Start the idle-in-transaction timer */ @@ -5416,7 +5426,8 @@ PostgresMain(int argc, char *argv[], } else if (IsTransactionOrTransactionBlock()) { - set_ps_display("idle in transaction"); + strncat(activity, "idle in transaction", remain); + set_ps_display(activity); pgstat_report_activity(STATE_IDLEINTRANSACTION, NULL); /* Start the idle-in-transaction timer */ @@ -5442,7 +5453,8 @@ PostgresMain(int argc, char *argv[], pgstat_report_stat(false); pgstat_report_queuestat(); - set_ps_display("idle"); + strncat(activity, "idle", remain); + set_ps_display(activity); pgstat_report_activity(STATE_IDLE, NULL); /* Start the idle-session timer */ diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 4b3168b533a..6163eff9454 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -1205,6 +1205,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("retrieve connection was not authenticated for unknown reason"))); + InitRetrieveCtl(); } /* diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 49e593586b1..538b209c31f 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11963,10 +11963,10 @@ proname => 'get_ao_compression_ratio', provolatile => 'v', proparallel => 'u', prorettype => 'float8', proargtypes => 'regclass', prosrc => 'get_ao_compression_ratio', prodataaccess => 'r', proexeclocation => 'i' }, { oid => 7180, descr => 'endpoints information on the cluster visible to the user', - proname => 'gp_endpoints', prorows => '1000', proretset => 't', provolatile => 'v', proparallel => 'u', prorettype => 'record', proargtypes => '', proallargtypes => '{int4,text,text,int4,text,int4,oid,text,text}', proargmodes => '{o,o,o,o,o,o,o,o,o}', proargnames => '{dbid,auth_token,cursorname,sessionid,hostname,port,userid,state,endpointname}', prosrc => 'gp_endpoints', proexeclocation => 'c' }, + proname => 'gp_get_endpoints', prorows => '1000', proretset => 't', provolatile => 'v', proparallel => 'u', prorettype => 'record', proargtypes => '', proallargtypes => '{int4,text,text,int4,varchar,int4,text,text,text}', proargmodes => '{o,o,o,o,o,o,o,o,o}', proargnames => '{gp_segment_id,auth_token,cursorname,sessionid,hostname,port,username,state,endpointname}', prosrc => 'gp_get_endpoints', proexeclocation => 'c' }, { oid => 7181, descr => 'endpoints information on the segment visible to the user', - proname => 'gp_segment_endpoints', prorows => '1000', proretset => 't', provolatile => 'v', proparallel => 'u', prorettype => 'record', proargtypes => '', proallargtypes => '{text,int4,int4,int4,text,int4,int4,oid,text,text}', proargmodes => '{o,o,o,o,o,o,o,o,o,o}', proargnames => '{auth_token,databaseid,senderpid,receiverpid,state,dbid,sessionid,userid,endpointname,cursorname}', prosrc => 'gp_segment_endpoints' }, + proname => 'gp_get_segment_endpoints', prorows => '1000', proretset => 't', provolatile => 'v', proparallel => 'u', prorettype => 'record', proargtypes => '', proallargtypes => '{text,oid,int4,int4,text,int4,int4,text,text,text}', proargmodes => '{o,o,o,o,o,o,o,o,o,o}', proargnames => '{auth_token,databaseid,senderpid,receiverpid,state,gp_segment_id,sessionid,username,endpointname,cursorname}', prosrc => 'gp_get_segment_endpoints' }, { oid => 7182, descr => 'wait until all endpoint of this parallel retrieve cursor has been retrieved finished', proname => 'gp_wait_parallel_retrieve_cursor', provolatile => 'v', proparallel => 'u', prorettype => 'bool', proargtypes => 'text int4', proallargtypes => '{text,int4,bool}', proargmodes => '{i,i,o}', proargnames => '{cursorname,timeout_sec,finished}', prosrc => 'gp_wait_parallel_retrieve_cursor', proexeclocation => 'c' }, diff --git a/src/include/cdb/cdbdisp.h b/src/include/cdb/cdbdisp.h index 714a74e4589..9fac725c5fd 100644 --- a/src/include/cdb/cdbdisp.h +++ b/src/include/cdb/cdbdisp.h @@ -132,7 +132,7 @@ cdbdisp_waitDispatchFinish(struct CdbDispatcherState *ds); * 0 means checking immediately, and -1 means waiting until all ack * messages are received. * - * QEs should call cdbdisp_sendAckMessageToQD to send acknowledge messages to QD. + * QEs should call EndpointNotifyQD to send acknowledge messages to QD. */ bool cdbdisp_checkDispatchAckMessage(struct CdbDispatcherState *ds, const char *message, @@ -196,13 +196,6 @@ CdbDispatcherState * cdbdisp_makeDispatcherState(bool isExtendedQuery); */ void cdbdisp_destroyDispatcherState(CdbDispatcherState *ds); -/* - * cdbdisp_sendAckMessageToQD - send acknowledge message to QD (runs on QE). - * - * QD uses cdbdisp_checkDispatchAckMessage() to wait QE acknowledge message. - */ -void cdbdisp_sendAckMessageToQD(const char *message); - void cdbdisp_makeDispatchParams(CdbDispatcherState *ds, int maxSlices, diff --git a/src/include/cdb/cdbendpoint.h b/src/include/cdb/cdbendpoint.h index f98c374c649..e7bc465158f 100644 --- a/src/include/cdb/cdbendpoint.h +++ b/src/include/cdb/cdbendpoint.h @@ -48,17 +48,36 @@ enum EndPointExecPosition #define STR_ENDPOINT_STATE_FINISHED "FINISHED" #define STR_ENDPOINT_STATE_RELEASED "RELEASED" +/* ACK NOTICE MESSAGE FROM ENDPOINT QE/Entry DB to QD */ +#define ENDPOINT_READY_ACK_MSG "ENDPOINT_READY" +#define ENDPOINT_FINISHED_ACK_MSG "ENDPOINT_FINISHED" + /* * Endpoint attach status, used by parallel retrieve cursor. */ typedef enum EndpointState { - ENDPOINTSTATE_INVALID, - ENDPOINTSTATE_READY, - ENDPOINTSTATE_RETRIEVING, - ENDPOINTSTATE_ATTACHED, - ENDPOINTSTATE_FINISHED, - ENDPOINTSTATE_RELEASED, + ENDPOINTSTATE_INVALID, /* The initial state of an endpoint. */ + ENDPOINTSTATE_READY, /* After retrieve cursor is declared and endpoint + * is allocated. */ + ENDPOINTSTATE_RETRIEVING,/* When a retrieve statement begin to retrieve + * tuples from an endpoint, this state may be + * transformed from READY or ATTACHED. + * + * READY->RETRIEVING: if this is the first retrieve statement. + * ATTACHED->RETRIEVING: if this is not the first retrieve statement. + */ + ENDPOINTSTATE_ATTACHED, /* After a retrieve statement is executed and not + * all tuples are retrieved. + * + * RETRIEVING-->ATTACHED + */ + ENDPOINTSTATE_FINISHED, /* After a retrieve statement is executed and all + * tuples are retrieved. + * + * RETRIEVING-->ATTACHED + */ + ENDPOINTSTATE_RELEASED, /* Retrieve role exits with error. */ } EndpointState; /* @@ -88,14 +107,14 @@ struct EndpointData * DSM (see session.c). */ }; -typedef struct EndpointData *Endpoint; +typedef struct EndpointData Endpoint; /* * The state information for parallel retrieve cursor */ typedef struct EndpointExecState { - Endpoint endpoint; /* endpoint entry */ + Endpoint *endpoint; /* endpoint entry */ DestReceiver *dest; dsm_segment *dsmSeg; /* dsm_segment pointer */ } EndpointExecState; @@ -110,27 +129,29 @@ extern Size EndpointShmemSize(void); extern void EndpointShmemInit(void); /* - * Below functions should run on dispatcher. + * Below functions should run on the QD. */ extern enum EndPointExecPosition GetParallelCursorEndpointPosition(PlannedStmt *plan); extern void WaitEndpointsReady(EState *estate); extern void AtAbort_EndpointExecState(void); -extern EndpointExecState *allocEndpointExecState(void); +extern void allocEndpointExecState(void); /* * Below functions should run on Endpoints(QE/Entry DB). */ -extern void SetupEndpointExecState(TupleDesc tupleDesc, - const char *cursorName, EndpointExecState *state); -extern void DestroyEndpointExecState(EndpointExecState *state); +extern void SetupEndpointExecState(TupleDesc tupleDesc, const char *cursorName, CmdType operation, DestReceiver **endpointDest); +extern void DestroyEndpointExecState(void); +extern void EndpointNotifyQD(const char *message); /* cdbendpointretrieve.c */ /* * Below functions should run on the retrieve backend. */ +extern void InitRetrieveCtl(void); extern bool AuthEndpoint(Oid userID, const char *tokenStr); extern TupleDesc GetRetrieveStmtTupleDesc(const RetrieveStmt *stmt); extern void ExecRetrieveStmt(const RetrieveStmt *stmt, DestReceiver *dest); +extern void generate_endpoint_name(char *name, const char *cursorName); #endif /* CDBENDPOINT_H */ diff --git a/src/include/commands/async.h b/src/include/commands/async.h index 85bfb247682..e35f75e34d4 100644 --- a/src/include/commands/async.h +++ b/src/include/commands/async.h @@ -50,5 +50,6 @@ extern void HandleNotifyInterrupt(void); /* process interrupts */ extern void ProcessNotifyInterrupt(bool flush); +extern void NotifyMyFrontEnd(const char *channel, const char *payload, int32 srcPid); #endif /* ASYNC_H */ diff --git a/src/test/examples/test_parallel_retrieve_cursor_nowait.c b/src/test/examples/test_parallel_retrieve_cursor_nowait.c index d76f31bc19c..cfb64f0fb91 100644 --- a/src/test/examples/test_parallel_retrieve_cursor_nowait.c +++ b/src/test/examples/test_parallel_retrieve_cursor_nowait.c @@ -248,13 +248,13 @@ main(int argc, char **argv) /* * get the endpoints info of this PARALLEL RETRIEVE CURSOR */ - const char *sql1 = "select hostname,port,auth_token,endpointname from gp_endpoints() where cursorname='myportal';"; + const char *sql1 = "select hostname,port,auth_token,endpointname from gp_get_endpoints() where cursorname='myportal';"; printf("\nExec SQL on Master:\n\t> %s\n", sql1); res1 = PQexec(master_conn, sql1); if (PQresultStatus(res1) != PGRES_TUPLES_OK) { - fprintf(stderr, "select gp_endpoints view didn't return tuples properly\n"); + fprintf(stderr, "select gp_get_endpoints view didn't return tuples properly\n"); PQclear(res1); goto LABEL_ERR; } @@ -263,7 +263,7 @@ main(int argc, char **argv) if (ntup <= 0) { - fprintf(stderr, "select gp_endpoints view doesn't return rows\n"); + fprintf(stderr, "select gp_get_endpoints view doesn't return rows\n"); goto LABEL_ERR; } diff --git a/src/test/examples/test_parallel_retrieve_cursor_wait.c b/src/test/examples/test_parallel_retrieve_cursor_wait.c index 45c5fd2c321..be967d1fc2b 100644 --- a/src/test/examples/test_parallel_retrieve_cursor_wait.c +++ b/src/test/examples/test_parallel_retrieve_cursor_wait.c @@ -211,13 +211,13 @@ main(int argc, char **argv) /* * get the endpoints info of this PARALLEL RETRIEVE CURSOR */ - const char *sql1 = "select hostname,port,auth_token,endpointname from gp_endpoints() where cursorname='myportal';"; + const char *sql1 = "select hostname,port,auth_token,endpointname from gp_get_endpoints() where cursorname='myportal';"; printf("\nExec SQL on Master:\n\t> %s\n", sql1); res1 = PQexec(master_conn, sql1); if (PQresultStatus(res1) != PGRES_TUPLES_OK) { - fprintf(stderr, "select gp_endpoints view didn't return tuples properly\n"); + fprintf(stderr, "select gp_get_endpoints view didn't return tuples properly\n"); PQclear(res1); goto LABEL_ERR; } @@ -226,7 +226,7 @@ main(int argc, char **argv) if (ntup <= 0) { - fprintf(stderr, "select gp_endpoints view doesn't return rows\n"); + fprintf(stderr, "select gp_get_endpoints view doesn't return rows\n"); goto LABEL_ERR; } diff --git a/src/test/isolation2/.gitignore b/src/test/isolation2/.gitignore index dfdc927f342..6c1667f11c8 100644 --- a/src/test/isolation2/.gitignore +++ b/src/test/isolation2/.gitignore @@ -11,6 +11,8 @@ sql/fts_manual_probe.sql expected/fts_manual_probe.out sql/workfile_mgr_test.sql expected/workfile_mgr_test.out +expected/idle_gang_cleaner.out +sql/idle_gang_cleaner.sql -- ignore generated pg_basebackup tests /sql/pg_basebackup*.sql diff --git a/src/test/isolation2/Makefile b/src/test/isolation2/Makefile index 0d0ad9bca28..e8d112e7aae 100644 --- a/src/test/isolation2/Makefile +++ b/src/test/isolation2/Makefile @@ -56,7 +56,7 @@ pg_isolation2_regress$(X): isolation2_main.o pg_regress.o submake-libpq submake- clean distclean: rm -f pg_isolation2_regress$(X) $(OBJS) isolation2_main.o rm -f isolation2_regress.so - rm -f pg_regress.o + rm -f pg_regress.o test_parallel_retrieve_cursor_extended_query test_parallel_retrieve_cursor_extended_query_error rm -f gpstringsubs.pl gpdiff.pl atmsort.pm explain.pm rm -f data rm -rf $(pg_regress_clean_files) diff --git a/src/test/isolation2/init_file_parallel_retrieve_cursor b/src/test/isolation2/init_file_parallel_retrieve_cursor index e5db0ff2989..b2430eafea2 100644 --- a/src/test/isolation2/init_file_parallel_retrieve_cursor +++ b/src/test/isolation2/init_file_parallel_retrieve_cursor @@ -3,6 +3,10 @@ m/^ERROR: .* (seg\d+ [0-9.]+:\d+ pid=\d+)/ s/seg\d+ [0-9.]+:\d+ pid=\d+/SEG IP:PORT pid=PID/ +# Ignore session id in fatal error message +m/for session id -?[0-9]+/ +s/for session id -?[0-9]+/for session id xxx/ + # skip specific PID in: ERROR: end point token_id3 already attached by receiver(pid: 50938) m/^ERROR:.*\(pid: \d+\)/ s/\(pid: \d+\)/\(pid: PID\)/ diff --git a/src/test/isolation2/input/parallel_retrieve_cursor/corner.source b/src/test/isolation2/input/parallel_retrieve_cursor/corner.source index 7c4c35738e3..7961e1d9f6a 100644 --- a/src/test/isolation2/input/parallel_retrieve_cursor/corner.source +++ b/src/test/isolation2/input/parallel_retrieve_cursor/corner.source @@ -18,12 +18,12 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test1: close not executed PARALLEL RETRIEVE CURSOR 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 1 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: CLOSE c1; -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT auth_token,state FROM gp_get_endpoints() WHERE cursorname='c1'; -- check no token info on QE after close PARALLEL RETRIEVE CURSOR -*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT1'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT1'; -- error out for closed cursor 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); @@ -32,12 +32,12 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- test for a large table 1: BEGIN; 1: DECLARE c11 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t11; -1: @post_run 'parse_endpoint_info 11 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c11'; +1: @post_run 'parse_endpoint_info 11 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c11'; 1: CLOSE c11; -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints() WHERE cursorname='c11'; +1: SELECT auth_token,state FROM gp_get_endpoints() WHERE cursorname='c11'; -- check no token info on QE after close PARALLEL RETRIEVE CURSOR -*U: @pre_run 'set_endpoint_variable @ENDPOINT11': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT11'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT11': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT11'; -- error out for closed cursor 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c11', -1); @@ -58,7 +58,7 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); 1: DECLARE c11 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; 1: ROLLBACK; -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints(); +1: SELECT auth_token,state FROM gp_get_endpoints(); -- Test3: execute non-existing PARALLEL RETRIEVE CURSOR 1: BEGIN; @@ -70,7 +70,7 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); 1: ROLLBACK; -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints(); +1: SELECT auth_token,state FROM gp_get_endpoints(); -- Test4: execute one of PARALLEL RETRIEVE CURSORs 1: BEGIN; @@ -85,21 +85,21 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); 1: DECLARE c9 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; 1: DECLARE c10 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; 1: DECLARE c11 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; -- test check and wait in normal way 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; *R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); -- check all endpoint state -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; 1: ROLLBACK; -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; -- cleanup retrieve connections *Rq: @@ -108,21 +108,21 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); 1: DECLARE c1 CURSOR FOR SELECT * FROM t1; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints(); +1: SELECT auth_token,state FROM gp_get_endpoints(); 1: ROLLBACK; 1: BEGIN; 1: DECLARE c1 CURSOR FOR SELECT * FROM t1; 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; *R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; 1: ROLLBACK; -- cleanup retrieve connections *Rq: @@ -130,16 +130,16 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test6: select order by limit 1: BEGIN; 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1 ORDER BY a LIMIT 10; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; *R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; -- test check and wait after finished retrieving 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; 1: ROLLBACK; -- cleanup retrieve connections *Rq: @@ -147,17 +147,17 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test7: select order by limit 0 1: BEGIN; 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1 ORDER BY a LIMIT 0; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; *R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; 1: ROLLBACK; -- cleanup retrieve connections *Rq: @@ -165,17 +165,17 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test8: select empty table 1: BEGIN; 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t2; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; *R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; 1: ROLLBACK; -- cleanup retrieve connections *Rq: @@ -183,17 +183,17 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test9: select table with text column 1: BEGIN; 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t3; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; *R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; 1: ROLLBACK; -- cleanup retrieve connections *Rq: @@ -201,17 +201,17 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test10: select empty table with text column 1: BEGIN; 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t4; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; *R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; 1: ROLLBACK; -- cleanup retrieve connections *Rq: @@ -219,17 +219,17 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test11: endpoints on one segment. 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1 WHERE a = 50; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', 0); 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; 1R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', 0); -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: ROLLBACK; -- cleanup retrieve connections 1Rq: @@ -237,16 +237,16 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test12: PARALLEL RETRIEVE CURSOR for aggregate function: sum 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT SUM(a) FROM t1; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', 0); 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); --1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +-1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; -1R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', 0); -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: ROLLBACK; -- cleanup retrieve connections -1Rq: @@ -254,14 +254,14 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test13: PARALLEL RETRIEVE CURSOR for aggregate function: avg 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT AVG(a) FROM t1; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; --1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +-1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; -1R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', 0); 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: ROLLBACK; -- cleanup retrieve connections -1Rq: @@ -269,14 +269,14 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test14: PARALLEL RETRIEVE CURSOR for count(*) 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT COUNT(*) FROM t1; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); --1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +-1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; -1R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: ROLLBACK; -- cleanup retrieve connections -1Rq: @@ -284,13 +284,13 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test15: PARALLEL RETRIEVE CURSOR for two tables' join; 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1, t5 where t1.a = t5.b; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); *R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: ROLLBACK; -- cleanup retrieve connections *Rq: @@ -298,13 +298,13 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test16: PARALLEL RETRIEVE CURSOR for the count of two tables' join; 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT COUNT(*) FROM t1, t5 where t1.a = t5.b; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -1R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: ROLLBACK; -- cleanup retrieve connections -1Rq: @@ -312,23 +312,23 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test17: re-execute a PARALLEL RETRIEVE CURSOR and retrieve in same sessions. 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; *R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: 1: CLOSE c1; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; *R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: ROLLBACK; -- cleanup retrieve connections *Rq: @@ -336,10 +336,10 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test18: re-execute a PARALLEL RETRIEVE CURSOR and retrieve in different sessions. 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; *R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 0Rq: @@ -349,14 +349,14 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); 1<: 1: CLOSE c1; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; *R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: ROLLBACK; -- cleanup retrieve connections *Rq: @@ -365,9 +365,9 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); 1: BEGIN; 1: SAVEPOINT s1; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: ROLLBACK TO s1; -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: ROLLBACK; @@ -384,11 +384,11 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); 1q: 1: BEGIN; 1: DECLARE c21a PARALLEL RETRIEVE CURSOR FOR SELECT COUNT(*) from t1; -1: @post_run 'get_tuple_cell TOKEN21a 1 1 ; create_match_sub $TOKEN21a token21a' : SELECT auth_token FROM gp_endpoints() WHERE cursorname='c21a'; +1: @post_run 'get_tuple_cell TOKEN21a 1 1 ; create_match_sub $TOKEN21a token21a' : SELECT auth_token FROM gp_get_endpoints() WHERE cursorname='c21a'; -- Declare more cursors in the same session should not change the first one's token 1: DECLARE c21b PARALLEL RETRIEVE CURSOR FOR SELECT COUNT(*) from t1; 1: DECLARE c21c PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; -1: SELECT auth_token FROM gp_endpoints() WHERE cursorname='c21a'; +1: SELECT auth_token FROM gp_get_endpoints() WHERE cursorname='c21a'; 1: COMMIT; 1q: *Rq: @@ -396,16 +396,16 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test22: UDF plan should be able to run on entry db. 1: BEGIN; 1: DECLARE c22 PARALLEL RETRIEVE CURSOR WITHOUT HOLD FOR SELECT * FROM generate_series(1,10); -1: @post_run 'parse_endpoint_info 22 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c22'; +1: @post_run 'parse_endpoint_info 22 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c22'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c22', 0); -*U: @pre_run 'set_endpoint_variable @ENDPOINT22': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT22'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT22': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT22'; *R: @pre_run 'set_endpoint_variable @ENDPOINT22': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT22"; -- test check and wait after finished retrieving 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c22', -1); -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c22'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c22'; 1: ROLLBACK; 1q: -1Rq: @@ -415,16 +415,16 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test23: Catalog scan plan should be able to run on entry db. 1: BEGIN; 1: DECLARE c23 PARALLEL RETRIEVE CURSOR WITHOUT HOLD FOR SELECT relname FROM pg_class where relname='pg_class'; -1: @post_run 'parse_endpoint_info 23 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c23'; +1: @post_run 'parse_endpoint_info 23 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c23'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c23', 0); -*U: @pre_run 'set_endpoint_variable @ENDPOINT23': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT23'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT23': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT23'; *R: @pre_run 'set_endpoint_variable @ENDPOINT23': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT23"; -- test check and wait after finished retrieving 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c23', -1); -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c23'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c23'; 1: ROLLBACK; -- cleanup retrieve connections *Rq: @@ -434,9 +434,9 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); 1: DECLARE "x12345678901234567890123456789012345678901234567890123456789x" PARALLEL RETRIEVE CURSOR WITHOUT HOLD FOR SELECT * FROM t5; 1: DECLARE "x123456789012345678901234567890123456789012345678901234567890123456789x" PARALLEL RETRIEVE CURSOR WITHOUT HOLD FOR SELECT * FROM t5; 1: DECLARE "x1234567890123456789012345678901234567890123456789012345678901x" PARALLEL RETRIEVE CURSOR WITHOUT HOLD FOR SELECT * FROM t5; -1: @post_run 'parse_endpoint_info 24 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='x12345678901234567890123456789012345678901234567890123456789x'; -1: @post_run 'parse_endpoint_info 24_1 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='x12345678901234567890123456789012345678901234567890123456789012'; -1: @post_run 'parse_endpoint_info 24_2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='x1234567890123456789012345678901234567890123456789012345678901x'; +1: @post_run 'parse_endpoint_info 24 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='x12345678901234567890123456789012345678901234567890123456789x'; +1: @post_run 'parse_endpoint_info 24_1 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='x12345678901234567890123456789012345678901234567890123456789012'; +1: @post_run 'parse_endpoint_info 24_2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='x1234567890123456789012345678901234567890123456789012345678901x'; *R: @pre_run 'set_endpoint_variable @ENDPOINT24': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT24"; *R: @pre_run 'set_endpoint_variable @ENDPOINT24_1': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT24_1"; *R: @pre_run 'set_endpoint_variable @ENDPOINT24_2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT24_2"; @@ -453,7 +453,7 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test26: Retrieve one endpoint and quit the session, then connect the segment again and retrieve twice. No crash should happen. 1: BEGIN; 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t5; -1: @post_run 'parse_endpoint_info 26 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 26 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; 2R: @pre_run 'set_endpoint_variable @ENDPOINT26' : RETRIEVE ALL FROM ENDPOINT "@ENDPOINT26"; 2Rq: @@ -470,16 +470,16 @@ INSERT INTO t5 SELECT GENERATE_SERIES(1, 10); -- Test27: General locus should run on entry db. Test23 tested Entry locus. 1: BEGIN; 1: DECLARE c27 PARALLEL RETRIEVE CURSOR FOR SELECT generate_series(1,10); -1: @post_run 'parse_endpoint_info 27 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c27'; +1: @post_run 'parse_endpoint_info 27 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c27'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c27', 0); -*U: @pre_run 'set_endpoint_variable @ENDPOINT27': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT27'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT27': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT27'; *R: @pre_run 'set_endpoint_variable @ENDPOINT27': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT27"; -- test check and wait after finished retrieving 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c27', -1); -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c27'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c27'; -- cleanup retrieve connections *Rq: diff --git a/src/test/isolation2/input/parallel_retrieve_cursor/fault_inject.source b/src/test/isolation2/input/parallel_retrieve_cursor/fault_inject.source index c04643d14f8..4e222432f61 100644 --- a/src/test/isolation2/input/parallel_retrieve_cursor/fault_inject.source +++ b/src/test/isolation2/input/parallel_retrieve_cursor/fault_inject.source @@ -22,18 +22,18 @@ insert into t1 select generate_series(1,100); --should work as normal 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -*U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1' or endpointname='DUMMYENDPOINTNAME'; +*U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1' or endpointname='DUMMYENDPOINTNAME'; *R: @pre_run 'set_endpoint_variable @ENDPOINT1': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT1"; 1<: 1: CLOSE c1; -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints() WHERE cursorname='c1' or endpointname='DUMMYENDPOINTNAME'; +1: SELECT auth_token,state FROM gp_get_endpoints() WHERE cursorname='c1' or endpointname='DUMMYENDPOINTNAME'; -- check no token info on QE after close PARALLEL RETRIEVE CURSOR -*U: SELECT * FROM gp_segment_endpoints() WHERE cursorname='c1' or endpointname='DUMMYENDPOINTNAME'; +*U: SELECT * FROM gp_get_segment_endpoints() WHERE cursorname='c1' or endpointname='DUMMYENDPOINTNAME'; 1: ROLLBACK; @@ -43,23 +43,23 @@ insert into t1 select generate_series(1,100); 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; -1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -0U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +0U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 0R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: -1U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +1U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 1R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; -2U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +2U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 2R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 1<: 1: ROLLBACK; -1: SELECT auth_token,state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT auth_token,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', 2); -- Test3: fault inject at the 5th time while retrieving tuples from endpoint @@ -68,23 +68,23 @@ insert into t1 select generate_series(1,100); 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; -1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -1U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +1U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 1R: @pre_run 'set_endpoint_variable @ENDPOINT3': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT3"; 1<: -0U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +0U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 0R: @pre_run 'set_endpoint_variable @ENDPOINT3': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT3"; -2U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +2U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 2R: @pre_run 'set_endpoint_variable @ENDPOINT3': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT3"; 1<: 1: ROLLBACK; -1: SELECT auth_token,state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT auth_token,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'reset', 3); -- Test4: error inject at the 5th time while retrieving tuples from endpoint @@ -92,18 +92,18 @@ insert into t1 select generate_series(1,100); 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; -1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -1U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +1U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 1R: @pre_run 'set_endpoint_variable @ENDPOINT4': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT4"; -0U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +0U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 0R: @pre_run 'set_endpoint_variable @ENDPOINT4': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT4"; 1<: -2U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +2U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 2R: @pre_run 'set_endpoint_variable @ENDPOINT4': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT4"; 1<: @@ -120,16 +120,16 @@ insert into t1 select generate_series(1,100); 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; -1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -0U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +0U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 0R&: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT5"; -2U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +2U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 2R&: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT5"; -1U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +1U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 1R: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT5"; SELECT gp_wait_until_triggered_fault('fetch_tuples_from_endpoint', 1, 2); @@ -156,15 +156,15 @@ SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 5, 5 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; -1: @post_run 'parse_endpoint_info 7 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 7 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; -0U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +0U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 0R&: @pre_run 'set_endpoint_variable @ENDPOINT7': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT7"; -1U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +1U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 1R&: @pre_run 'set_endpoint_variable @ENDPOINT7': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT7"; -2U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +2U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 2R&: @pre_run 'set_endpoint_variable @ENDPOINT7': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT7"; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', 0); @@ -212,16 +212,16 @@ SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 800, 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t2; -1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -0U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +0U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 0R&: @pre_run 'set_endpoint_variable @ENDPOINT6': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT6"; -2U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +2U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 2R&: @pre_run 'set_endpoint_variable @ENDPOINT6': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT6"; -1U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +1U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; 1R: @pre_run 'set_endpoint_variable @ENDPOINT6': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT6"; 1<: diff --git a/src/test/isolation2/input/parallel_retrieve_cursor/privilege.source b/src/test/isolation2/input/parallel_retrieve_cursor/privilege.source index 66a4054a295..02a7bbead58 100644 --- a/src/test/isolation2/input/parallel_retrieve_cursor/privilege.source +++ b/src/test/isolation2/input/parallel_retrieve_cursor/privilege.source @@ -25,7 +25,7 @@ RESET SESSION AUTHORIZATION; 1: SELECT SESSION_USER, CURRENT_USER; 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: SET SESSION AUTHORIZATION u1; 1: SELECT SESSION_USER, CURRENT_USER; --- c2 is declared by u1 @@ -33,22 +33,22 @@ RESET SESSION AUTHORIZATION; --- c12 is declared by u1 on entry db 1: DECLARE c12 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM generate_series(1,10); --- u1 is able to see all endpoints created by himself. -1: SELECT DISTINCT(cursorname), usename FROM gp_endpoints() AS e, pg_user AS u where e.userid = u.usesysid; +1: SELECT DISTINCT(cursorname), username FROM gp_get_endpoints(); --- adminuser should be able to see all the endpoints declared by u1 with state READY 2: SET SESSION AUTHORIZATION adminuser; 2: SELECT SESSION_USER, CURRENT_USER; -2: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; -2: @post_run 'parse_endpoint_info 12 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c12'; -2: SELECT DISTINCT(cursorname), usename FROM gp_endpoints() AS e, pg_user AS u where e.userid = u.usesysid; +2: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; +2: @post_run 'parse_endpoint_info 12 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c12'; +2: SELECT DISTINCT(cursorname), username FROM gp_get_endpoints(); --- adminuser should be able to see the cursor state change to READY -2: SELECT auth_token, usename, state FROM gp_endpoints() endpoints, pg_user WHERE endpoints.userid = pg_user.usesysid order by usename; +2: SELECT auth_token, username, state FROM gp_get_endpoints() endpoints order by username; --- adminuser should be able to see all endpoints declared by u1 in utility mode 3: @pre_run 'export CURRENT_ENDPOINT_POSTFIX=1 ; export RETRIEVE_USER="adminuser"; echo $RAW_STR ' : SELECT 1; 0R: SELECT SESSION_USER, CURRENT_USER; -0U: SELECT auth_token, usename FROM gp_segment_endpoints() AS e, pg_user AS u where e.userid = u.usesysid; +0U: SELECT auth_token, username FROM gp_get_segment_endpoints(); 0R: @pre_run 'set_endpoint_variable @ENDPOINT1': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT1"; 0Rq: 3: @pre_run 'export CURRENT_ENDPOINT_POSTFIX=2 ; export RETRIEVE_USER="u1"; echo $RAW_STR ' : SELECT 1; @@ -59,10 +59,10 @@ RESET SESSION AUTHORIZATION; 1: SET ROLE uu1; 1: SELECT SESSION_USER, CURRENT_USER; 1: DECLARE c3 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -2: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c3'; +2: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c3'; --- uu1 can not see u1's endpoints. -1: SELECT DISTINCT(cursorname), usename FROM gp_endpoints() AS e, pg_user AS u where e.userid = u.usesysid; -2: SELECT DISTINCT(cursorname), usename FROM gp_endpoints() AS e, pg_user AS u where e.userid = u.usesysid; +1: SELECT DISTINCT(cursorname), username FROM gp_get_endpoints(); +2: SELECT DISTINCT(cursorname), username FROM gp_get_endpoints(); 3: @pre_run 'export RETRIEVE_USER="uu1"; echo $RAW_STR ' : SELECT 1; --- Login as uu1 and retrieve, only u1 can retrieve @@ -91,9 +91,9 @@ RESET SESSION AUTHORIZATION; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -2: @post_run 'parse_endpoint_info 40 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c0'; +2: @post_run 'parse_endpoint_info 40 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c0'; -2: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +2: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; --- adminuser should NOT be able to retrieve from other's PARALLEL RETRIEVE CURSOR 3: @pre_run 'export CURRENT_ENDPOINT_POSTFIX=40 ; export RETRIEVE_USER="adminuser"; echo $RAW_STR ' : SELECT 1; @@ -130,15 +130,15 @@ RESET SESSION AUTHORIZATION; 1: BEGIN; -- Used to let super login to retrieve session so then it can change user in session. 1: DECLARE c0 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 50 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c0'; +1: @post_run 'parse_endpoint_info 50 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c0'; 1: SET SESSION AUTHORIZATION u1; --- c4 is declared and executed by u1 1: DECLARE c4 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c4'; +1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c4'; --- u2 is not able to see u1's endpoints on master 1: SET SESSION AUTHORIZATION u2; -1: SELECT * from gp_endpoints(); +1: SELECT * from gp_get_endpoints(); --- execute the cursor by u1 1: SET SESSION AUTHORIZATION u1; @@ -146,7 +146,7 @@ RESET SESSION AUTHORIZATION; --- u2 is not able to see u1's endpoints in RETRIEVE mode *R: @pre_run 'export CURRENT_ENDPOINT_POSTFIX=50 ; export RETRIEVE_USER="adminuser" ; echo $RAW_STR' : SET SESSION AUTHORIZATION u2; -*U: SELECT auth_token, usename FROM gp_segment_endpoints() AS e, pg_user AS u where e.userid = u.usesysid; +*U: SELECT auth_token, username FROM gp_get_segment_endpoints(); --- u2 is not able to retrieve from u1's endpoints in RETRIEVE mode *R: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT5"; diff --git a/src/test/isolation2/input/parallel_retrieve_cursor/replicated_table.source b/src/test/isolation2/input/parallel_retrieve_cursor/replicated_table.source index 1217d6c9517..5431e438241 100644 --- a/src/test/isolation2/input/parallel_retrieve_cursor/replicated_table.source +++ b/src/test/isolation2/input/parallel_retrieve_cursor/replicated_table.source @@ -7,12 +7,12 @@ insert into rt1 select generate_series(1,100); --------- Test1: Basic test for PARALLEL RETRIEVE CURSOR on replicated table -- Replicated table will execute on seg id: session_id % segment_number --- Declare a cursor and check gp_endpoints(), we can find out the real +-- Declare a cursor and check gp_get_endpoints(), we can find out the real -- segment id by joining gp_segment_configuration. This should equal to -- session_id % 3 (size of demo cluster). 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM rt1; -1: SELECT sc.content = current_setting('gp_session_id')::int % 3 AS diff FROM gp_endpoints() ep, gp_segment_configuration sc WHERE ep.dbid = sc.dbid; +1: SELECT sc.content = current_setting('gp_session_id')::int % 3 AS diff FROM gp_get_endpoints() ep, gp_segment_configuration sc WHERE ep.gp_segment_id = sc.content; 1: ROLLBACK; 1q: @@ -34,17 +34,17 @@ insert into rt1 select generate_series(1,100); -- Just declare & CHECK PARALLEL RETRIEVE CURSORs in all segment_number (i.e. 3) sessions, -- so that there should have specific session: MOD(sessionid,3)=1; -- Get token only in specific session id and retrieve this token. -7: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE MOD(sessionid,3)=1 LIMIT 1; +7: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE MOD(sessionid,3)=1 LIMIT 1; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); 2&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); 3&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c3', -1); 4&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c4', -1); 5&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c5', -1); 6&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c6', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; *R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT2"; -- cancel all 6 sessions -7: select pg_cancel_backend(pid) from pg_stat_activity, gp_endpoints() where sess_id = sessionid AND (cursorname ='c1' or cursorname='c2' or cursorname='c3' or cursorname ='c4' or cursorname='c5' or cursorname='c6'); +7: select pg_cancel_backend(pid) from pg_stat_activity, gp_get_endpoints() where sess_id = sessionid AND (cursorname ='c1' or cursorname='c2' or cursorname='c3' or cursorname ='c4' or cursorname='c5' or cursorname='c6'); 1<: 2<: 3<: @@ -91,17 +91,17 @@ insert into rt1 select generate_series(1,100); -- Just declare & CHECK PARALLEL RETRIEVE CURSORs in all segment_number (i.e. 3) sessions, -- so that there should have specific session: MOD(sessionid,3)=1; -- Get token only in specific session id and retrieve this token. -7: @post_run 'parse_endpoint_info 3 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE MOD(sessionid,3)=1 LIMIT 1; +7: @post_run 'parse_endpoint_info 3 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE MOD(sessionid,3)=1 LIMIT 1; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); 2&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); 3&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c3', -1); 4&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c4', -1); 5&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c5', -1); 6&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c6', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT3': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT3'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT3': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT3'; *R: @pre_run 'set_endpoint_variable @ENDPOINT3': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT3"; -- cancel all 6 sessions -7: select pg_cancel_backend(pid) from pg_stat_activity, gp_endpoints() where sess_id = sessionid AND (cursorname ='c1' or cursorname='c2' or cursorname='c3' or cursorname ='c4' or cursorname='c5' or cursorname='c6'); +7: select pg_cancel_backend(pid) from pg_stat_activity, gp_get_endpoints() where sess_id = sessionid AND (cursorname ='c1' or cursorname='c2' or cursorname='c3' or cursorname ='c4' or cursorname='c5' or cursorname='c6'); 1<: 2<: 3<: @@ -148,17 +148,17 @@ insert into rt1 select generate_series(1,100); -- Just declare & CHECK PARALLEL RETRIEVE CURSORs in all segment_number (i.e. 3) sessions, -- so that there should have specific session: MOD(sessionid,3)=1; -- Get token only in specific session id and retrieve this token. -7: @post_run 'parse_endpoint_info 4 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE MOD(sessionid,3)=1 LIMIT 1; +7: @post_run 'parse_endpoint_info 4 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE MOD(sessionid,3)=1 LIMIT 1; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); 2&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); 3&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c3', -1); 4&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c4', -1); 5&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c5', -1); 6&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c6', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT4': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT4'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT4': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT4'; *R: @pre_run 'set_endpoint_variable @ENDPOINT4': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT4"; -- cancel all 6 sessions -7: select pg_cancel_backend(pid) from pg_stat_activity, gp_endpoints() where sess_id = sessionid AND (cursorname ='c1' or cursorname='c2' or cursorname='c3' or cursorname ='c4' or cursorname='c5' or cursorname='c6'); +7: select pg_cancel_backend(pid) from pg_stat_activity, gp_get_endpoints() where sess_id = sessionid AND (cursorname ='c1' or cursorname='c2' or cursorname='c3' or cursorname ='c4' or cursorname='c5' or cursorname='c6'); 1<: 2<: 3<: diff --git a/src/test/isolation2/input/parallel_retrieve_cursor/retrieve_quit_check.source b/src/test/isolation2/input/parallel_retrieve_cursor/retrieve_quit_check.source index 21e2a8a45ed..05c5926728a 100644 --- a/src/test/isolation2/input/parallel_retrieve_cursor/retrieve_quit_check.source +++ b/src/test/isolation2/input/parallel_retrieve_cursor/retrieve_quit_check.source @@ -10,9 +10,9 @@ insert into t1 select generate_series(1,100); 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; 1: DECLARE c3 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; -1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; -1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c3'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c3'; -- in one retrieve session, retrieve multiple tokens (begin retrieving, finished retrieving, not yet retrieve) 0R: @pre_run 'set_endpoint_variable @ENDPOINT1' : RETRIEVE 10 FROM ENDPOINT "@ENDPOINT1"; @@ -25,9 +25,9 @@ insert into t1 select generate_series(1,100); 3R: @pre_run 'set_endpoint_variable @ENDPOINT2' : RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; 3R: @pre_run 'set_endpoint_variable @ENDPOINT3' : RETRIEVE 10 FROM ENDPOINT "@ENDPOINT3"; -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); -- verify endpoints on seg0 for c2 has been finishied -0U: SELECT cursorname,senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: SELECT cursorname,senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints(); -- quit the first retrieve session 0Rq: @@ -41,21 +41,21 @@ insert into t1 select generate_series(1,100); -- by this retrieve process should be cancelled. -- The endpoint on seg0 for c1 should firstly become to RELEASED (the retrieve process set it), -- and then was removed (during the endpoint QE cancelled) -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); -- verify endpoints for c1 is gone -0U: SELECT cursorname, senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: SELECT cursorname, senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints(); -- Now check on c1 will trigger the error, all endpoints should be aborted since the transaction -- will be terminated. 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', 0); 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c3', 0); -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); 1: END; -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); -- quit all sessions 1q: diff --git a/src/test/isolation2/input/parallel_retrieve_cursor/retrieve_quit_wait.source b/src/test/isolation2/input/parallel_retrieve_cursor/retrieve_quit_wait.source index 7bf6329fd86..28d89356857 100644 --- a/src/test/isolation2/input/parallel_retrieve_cursor/retrieve_quit_wait.source +++ b/src/test/isolation2/input/parallel_retrieve_cursor/retrieve_quit_wait.source @@ -10,14 +10,14 @@ insert into t1 select generate_series(1,100); 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; 1: DECLARE c3 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; -1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; -1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c3'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c3'; -- Wait until the c2 has been fully retrieved 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); -- in all retrieve sessions, retrieve multiple tokens (begin retrieving, finished retrieving, not yet retrieve) *R: @pre_run 'set_endpoint_variable @ENDPOINT1' : RETRIEVE 10 FROM ENDPOINT "@ENDPOINT1"; @@ -27,7 +27,7 @@ insert into t1 select generate_series(1,100); -- Retrieving on C2 finished. 1<: -0U: SELECT cursorname,senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: SELECT cursorname,senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints(); -- quit the retrieve session 0Rq: -- Now the interrupt is checked using WaitLatch() for time: WAIT_NORMAL_TIMEOUT, @@ -38,12 +38,12 @@ insert into t1 select generate_series(1,100); -- by this retrieve process should be cancelled. -- The endpoint on seg0 for c1 should firstly become to RELEASED (the retrieve process set it), -- and then was removed (during the endpoint QE cancelled) -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); -- Since c1 has been only partially retrieved, an error will be raised when transaction ends. 1: END; -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); --------- Test2: test for wait for quit partially retrieving session will abort all endpoints in the transaction. @@ -51,9 +51,9 @@ insert into t1 select generate_series(1,100); 1: DECLARE c4 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; 1: DECLARE c5 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; 1: DECLARE c6 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c4'; -1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c5'; -1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c6'; +1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c4'; +1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c5'; +1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c6'; -- Wait until retrieving session for c4 quits 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c4', -1); @@ -63,14 +63,14 @@ insert into t1 select generate_series(1,100); *R: @pre_run 'set_endpoint_variable @ENDPOINT5' : RETRIEVE ALL FROM ENDPOINT "@ENDPOINT5"; -- skip TOKEN3 in this session -0U: SELECT cursorname,senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: SELECT cursorname,senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints(); 0Rq: -- Since retrieving session abort, waiting should be interrupted. 1<: -- All endpoints should be removed since error happened. -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); -- quit all sessions 1q: diff --git a/src/test/isolation2/input/parallel_retrieve_cursor/security.source b/src/test/isolation2/input/parallel_retrieve_cursor/security.source index cdb85ed9577..533afe8dac4 100644 --- a/src/test/isolation2/input/parallel_retrieve_cursor/security.source +++ b/src/test/isolation2/input/parallel_retrieve_cursor/security.source @@ -16,7 +16,7 @@ $$ LANGUAGE SQL; -- Test: Declare a cursor 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; -- Test: Should not change gp_role in retrieve mode 0R: set gp_role to 'utility'; diff --git a/src/test/isolation2/input/parallel_retrieve_cursor/special_query.source b/src/test/isolation2/input/parallel_retrieve_cursor/special_query.source index f86d8336dca..a6e5ae7426b 100644 --- a/src/test/isolation2/input/parallel_retrieve_cursor/special_query.source +++ b/src/test/isolation2/input/parallel_retrieve_cursor/special_query.source @@ -11,12 +11,12 @@ SELECT make_record(x) FROM t1; 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT make_record(x) FROM t1; -1: @post_run 'parse_endpoint_info 1 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', 0); 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT1'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT1'; *R: @pre_run 'set_endpoint_variable @ENDPOINT1': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT1"; 1<: @@ -45,7 +45,7 @@ SET gp_interconnect_queue_depth=1; 2: BEGIN; 2: SHOW gp_max_packet_size; 2: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t2 join t2 t12 on true; -2: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; 2: CLOSE c2; 2: END; diff --git a/src/test/isolation2/input/parallel_retrieve_cursor/status_check.source b/src/test/isolation2/input/parallel_retrieve_cursor/status_check.source index a5f176c0f59..92719a0c9ac 100644 --- a/src/test/isolation2/input/parallel_retrieve_cursor/status_check.source +++ b/src/test/isolation2/input/parallel_retrieve_cursor/status_check.source @@ -1,4 +1,4 @@ --- @Description Tests the state for pg_endpoints AND gp_segment_endpoints(), focus in nowait mode +-- @Description Tests the state for pg_endpoints AND gp_get_segment_endpoints(), focus in nowait mode -- need to fault injection to gp_wait_parallel_retrieve_cursor() -- DROP TABLE IF EXISTS t1; @@ -8,18 +8,18 @@ insert into t1 select generate_series(1,100); --------- Test1: Basic test for parallel retrieve interface & close cursor 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', 0); -*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT1'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT1'; *R: @pre_run 'set_endpoint_variable @ENDPOINT1': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT1"; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', 0); 1: CLOSE c1; -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT auth_token,state FROM gp_get_endpoints() WHERE cursorname='c1'; -- check no token info on QE after close PARALLEL RETRIEVE CURSOR -*U: SELECT * FROM gp_segment_endpoints() WHERE cursorname='c1'; +*U: SELECT * FROM gp_get_segment_endpoints() WHERE cursorname='c1'; -- error out for closed cursor 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', 0); @@ -28,33 +28,33 @@ insert into t1 select generate_series(1,100); ---------- Test2: enhanced test for parallel retrieve interface state & cursor auto closed when transaction closed 1: BEGIN; 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; -- test RETRIEVE success on seg1 0R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT2"; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); -- check initial state after "CHECK PARALLEL RETRIEVE CURSOR" -2: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c2'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c2'; -- check state if some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving 0R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT2"; 1R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; -2: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c2'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c2'; -- return 0 row instead of reporting error if finished retrieving data from this endpoint, while other endpoint have not finished retrieving. 1R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; -- finished retrieving all endpoints and check state *R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; -2: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c2'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c2'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); 1: COMMIT; -- check the cursor auto closed when transaction closed -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; -- check no token info on QE after close PARALLEL RETRIEVE CURSOR -*U: SELECT * FROM gp_segment_endpoints() WHERE cursorname='c2'; +*U: SELECT * FROM gp_get_segment_endpoints() WHERE cursorname='c2'; -- error out for closed cursor 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); @@ -63,7 +63,7 @@ insert into t1 select generate_series(1,100); ---------- Test3: 2 retrieving sessions connect to the same endpoint report error & cancel QE exec backend 1: BEGIN; 1: DECLARE c3 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c3'; +1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c3'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c3', 0); 0R: @pre_run 'set_endpoint_variable @ENDPOINT3': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT3"; -- a new retrieve session should report error @@ -73,33 +73,33 @@ insert into t1 select generate_series(1,100); -- some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving 1R: @pre_run 'set_endpoint_variable @ENDPOINT3': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT3"; -- get senderpid which is endpoint execution backend -0U: @post_run 'get_tuple_cell PID31 1 1 ; create_sub "$PID31[ \t]*" senderpid31': SELECT senderpid, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: @post_run 'get_tuple_cell PID31 1 1 ; create_sub "$PID31[ \t]*" senderpid31': SELECT senderpid, receiverpid<>-1, state FROM gp_get_segment_endpoints(); -- run 'kill -s INT senderpid' to cancel the endpoint execution backend, retrieve session still can work 42: @pre_run 'kill -s INT ${PID31} && echo "${RAW_STR}" ': SELECT 1; -- check it can cancel the "gp_wait_parallel_retrieve_cursor" 1: SELECT pg_sleep(0.4); -2: SELECT state FROM gp_endpoints() WHERE cursorname='c3'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c3'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c3', 0); -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c3'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c3'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c3'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c3'; -- report error for EXECUTE canceled PARALLEL RETRIEVE CURSOR 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c3', 0); 1: ROLLBACK; -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c3'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c3'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c3'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c3'; ---------- Test4: terminate (using signal QUIT) QE exec backend 1: BEGIN; 1: DECLARE c4 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c4'; +1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c4'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c4', 0); -- some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving 0R: @pre_run 'set_endpoint_variable @ENDPOINT4': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT4"; 1R: @pre_run 'set_endpoint_variable @ENDPOINT4': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT4"; -- get senderpid which is endpoint execution backend -0U: @post_run 'get_tuple_cell PID41 1 1 ; create_sub "${PID41}[ \t]*" senderpid41': SELECT senderpid, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: @post_run 'get_tuple_cell PID41 1 1 ; create_sub "${PID41}[ \t]*" senderpid41': SELECT senderpid, receiverpid<>-1, state FROM gp_get_segment_endpoints(); -- run 'kill -s QUIT senderpid' to cancel the endpoint execution backend, retrieve session still can work 42: @pre_run 'kill -s QUIT ${PID41} && echo "${RAW_STR}" ': SELECT 1; -- exit this session because the connection closed, so that it will re-connect next time use this session. @@ -107,24 +107,24 @@ insert into t1 select generate_series(1,100); 0Uq: -- check it can cancel the "gp_wait_parallel_retrieve_cursor" 1: SELECT pg_sleep(0.4); -2: SELECT state FROM gp_endpoints() WHERE cursorname='c4'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c4'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c4', 0); -- check no endpoint info left 2q: -2: SELECT state FROM gp_endpoints() WHERE cursorname='c4'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c4'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c4'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c4'; -- report error for EXECUTE canceled PARALLEL RETRIEVE CURSOR 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c4', 0); 1: ROLLBACK; -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c4'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c4'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c4'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c4'; 2Rq: ---------- Test5: terminate (using signal TERM) QE exec backend 1: BEGIN; 1: DECLARE c5 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c5'; +1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c5'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c5', 0); -- some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving 0R: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT5"; @@ -134,22 +134,22 @@ insert into t1 select generate_series(1,100); 1Rq: 1R: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT5"; -- get senderpid which is endpoint execution backend -0U: @post_run 'get_tuple_cell PID51 1 1 ; create_sub "${PID51}[ \t]*" senderpid51': SELECT senderpid, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: @post_run 'get_tuple_cell PID51 1 1 ; create_sub "${PID51}[ \t]*" senderpid51': SELECT senderpid, receiverpid<>-1, state FROM gp_get_segment_endpoints(); -- run 'kill -s TERM senderpid' to cancel the endpoint execution backend, retrieve session still can work 42: @pre_run 'kill -s TERM ${PID51} && echo "${RAW_STR}" ': SELECT 1; -- check it can cancel the "gp_wait_parallel_retrieve_cursor" 1: SELECT pg_sleep(0.4); -2: SELECT state FROM gp_endpoints() WHERE cursorname='c5'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c5'; 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c5', 0); -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c5'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c5'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c5'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c5'; -- report error for EXECUTE canceled PARALLEL RETRIEVE CURSOR 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c5', 0); 1: ROLLBACK; -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c5'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c5'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c5'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c5'; ---------- Test6: Cancel (using signal INT) the process of 'CHECK PARALLEL RETRIEVE CURSOR' -- faul injection on QD @@ -157,7 +157,7 @@ insert into t1 select generate_series(1,100); 1: SELECT gp_inject_fault('gp_wait_parallel_retrieve_cursor_after_udf', 'sleep', '', '', '', 1, 1, 1, 1::smallint); 1: BEGIN; 1: DECLARE c6 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c6'; +1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c6'; -- get backend pid of this session which run 'gp_wait_parallel_retrieve_cursor' 1: @post_run 'get_tuple_cell PID61 1 1 ; create_sub "${PID61}[ \t]*" QDPid61': select pg_backend_pid(); -- some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving @@ -170,14 +170,14 @@ insert into t1 select generate_series(1,100); -- check it can cancel the "gp_wait_parallel_retrieve_cursor" 1<: -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c6'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c6'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c6'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c6'; -- report error for EXECUTE canceled PARALLEL RETRIEVE CURSOR 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c6', 0); 1: ROLLBACK; -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c6'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c6'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c6'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c6'; ---------- Test6.1: Cancel (using signal INT) the process of 'CHECK PARALLEL RETRIEVE CURSOR' without rollback -- faul injection on QD @@ -185,7 +185,7 @@ insert into t1 select generate_series(1,100); 1: SELECT gp_inject_fault('gp_wait_parallel_retrieve_cursor_after_udf', 'sleep', '', '', '', 1, 1, 1, 1::smallint); 1: BEGIN; 1: DECLARE c61 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 61 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c61'; +1: @post_run 'parse_endpoint_info 61 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c61'; -- get backend pid of this session which run 'gp_wait_parallel_retrieve_cursor' 1: @post_run 'get_tuple_cell PID611 1 1 ; create_sub "${PID611}[ \t]*" QDPid611': select pg_backend_pid(); -- some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving @@ -198,13 +198,13 @@ insert into t1 select generate_series(1,100); -- check it can cancel the "gp_wait_parallel_retrieve_cursor" 1<: -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c61'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c61'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c61'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c61'; -- quit the session of 'CHECK PARALLEL RETRIEVE CURSOR' and keep other session connected 1q: -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c61'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c61'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c61'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c61'; 0Rq: 1Rq: @@ -213,13 +213,13 @@ insert into t1 select generate_series(1,100); 1: SELECT gp_inject_fault('gp_wait_parallel_retrieve_cursor_after_udf', 'sleep', '', '', '', 1, 1, 1, 1::smallint); 1: BEGIN; 1: DECLARE c7 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 7 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c7'; +1: @post_run 'parse_endpoint_info 7 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c7'; -- get backend pid of this session which run 'gp_wait_parallel_retrieve_cursor' 1: @post_run 'get_tuple_cell PID71 1 1 ; create_sub "${PID71}[ \t]*" QDPid71': select pg_backend_pid(); -- some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving 0R: @pre_run 'set_endpoint_variable @ENDPOINT7': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT7"; 1R: @pre_run 'set_endpoint_variable @ENDPOINT7': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT7"; -2U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c7'; +2U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c7'; -- run 'kill -s QUIT QDPid' to cancel the endpoint execution backend, retrieve session still can work -- here need to sleep sometime to wait for endpoint QE backend to detect QD connection lost. 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c7', 0); @@ -231,25 +231,25 @@ insert into t1 select generate_series(1,100); 2q: -1Uq: -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c7'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c7'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c7'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c7'; -- report error for EXECUTE canceled PARALLEL RETRIEVE CURSOR 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c7', -1); 1: ROLLBACK; -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c7'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c7'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c7'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c7'; ---------- Test8: Status visibilities for different sessions 1: BEGIN; 1: DECLARE c8 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'get_tuple_cell SESSION81 1 1 ; create_match_sub_with_spaces $SESSION81 session81' : SELECT sessionid,state FROM gp_session_endpoints() WHERE cursorname='c8'; +1: @post_run 'get_tuple_cell SESSION81 1 1 ; create_match_sub_with_spaces $SESSION81 session81' : SELECT sessionid,state FROM gp_get_session_endpoints() WHERE cursorname='c8'; -- Session 2 can only see its own cursors by default. 2: BEGIN; 2: DECLARE c8 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -2: @post_run 'get_tuple_cell SESSION82 1 1 ; create_match_sub_with_spaces $SESSION82 session82' : SELECT sessionid,state FROM gp_session_endpoints() WHERE cursorname='c8'; --- Session 2 can see all cursors with gp_endpoints(). -2: SELECT sessionid,state FROM gp_endpoints() WHERE cursorname='c8' order by sessionid; +2: @post_run 'get_tuple_cell SESSION82 1 1 ; create_match_sub_with_spaces $SESSION82 session82' : SELECT sessionid,state FROM gp_get_session_endpoints() WHERE cursorname='c8'; +-- Session 2 can see all cursors with gp_get_endpoints(). +2: SELECT sessionid,state FROM gp_get_endpoints() WHERE cursorname='c8' order by sessionid; 1: CLOSE c8; 1: END; diff --git a/src/test/isolation2/input/parallel_retrieve_cursor/status_wait.source b/src/test/isolation2/input/parallel_retrieve_cursor/status_wait.source index e1371aea5e8..2e2294bdd65 100644 --- a/src/test/isolation2/input/parallel_retrieve_cursor/status_wait.source +++ b/src/test/isolation2/input/parallel_retrieve_cursor/status_wait.source @@ -1,4 +1,4 @@ --- @Description Tests the state for pg_endpoints AND gp_segment_endpoints(), focus in wait mode +-- @Description Tests the state for pg_endpoints AND gp_get_segment_endpoints(), focus in wait mode -- DROP TABLE IF EXISTS t1; CREATE TABLE t1 (a INT) DISTRIBUTED by (a); @@ -7,18 +7,18 @@ insert into t1 select generate_series(1,100); --------- Test1: Basic test for parallel retrieve interface & close cursor 1: BEGIN; 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT1'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT1'; *R: @pre_run 'set_endpoint_variable @ENDPOINT1': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT1"; 1<: 1: CLOSE c1; -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT auth_token,state FROM gp_get_endpoints() WHERE cursorname='c1'; -- check no token info on QE after close PARALLEL RETRIEVE CURSOR -*U: SELECT * FROM gp_segment_endpoints() WHERE cursorname='c1'; +*U: SELECT * FROM gp_get_segment_endpoints() WHERE cursorname='c1'; -- error out for closed cursor 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', 0); @@ -27,33 +27,33 @@ insert into t1 select generate_series(1,100); ---------- Test2: enhanced test for parallel retrieve interface state & cursor auto closed when transaction closed 1: BEGIN; 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; -- test RETRIEVE success on seg1 0R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT2"; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -- check initial state after "CHECK PARALLEL RETRIEVE CURSOR" -2: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c2'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c2'; -- check state if some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving 0R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT2"; 1R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; -2: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c2'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c2'; -- return 0 row instead of reporting error if finished retrieving data from this endpoint, while other endpoint have not finished retrieving. 1R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; -- finished retrieving all endpoints and check state *R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; -2: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c2'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c2'; 1<: 1: COMMIT; -- check the cursor auto closed when transaction closed -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; -- check no token info on QE after close PARALLEL RETRIEVE CURSOR -*U: SELECT * FROM gp_segment_endpoints() WHERE cursorname='c2'; +*U: SELECT * FROM gp_get_segment_endpoints() WHERE cursorname='c2'; -- error out for closed cursor 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); @@ -62,7 +62,7 @@ insert into t1 select generate_series(1,100); ---------- Test3: 2 retrieving sessions connect to the same endpoint report error & cancel QE exec backend 1: BEGIN; 1: DECLARE c3 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c3'; +1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c3'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c3', -1); 0R: @pre_run 'set_endpoint_variable @ENDPOINT3': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT3"; -- a new retrieve session should report error @@ -72,31 +72,31 @@ insert into t1 select generate_series(1,100); -- some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving 1R: @pre_run 'set_endpoint_variable @ENDPOINT3': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT3"; -- get senderpid which is endpoint execution backend -0U: @post_run 'get_tuple_cell PID31 1 1 ; create_sub "$PID31[ \t]*" senderpid31': SELECT senderpid, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: @post_run 'get_tuple_cell PID31 1 1 ; create_sub "$PID31[ \t]*" senderpid31': SELECT senderpid, receiverpid<>-1, state FROM gp_get_segment_endpoints(); -- run 'kill -s INT senderpid' to cancel the endpoint execution backend, retrieve session still can work 42: @pre_run 'kill -s INT ${PID31} && echo "${RAW_STR}" ': SELECT 1; -- check it can cancel the "gp_wait_parallel_retrieve_cursor" 1<: -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c3'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c3'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c3'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c3'; -- report error for EXECUTE canceled PARALLEL RETRIEVE CURSOR 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c3', -1); 1: ROLLBACK; -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c3'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c3'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c3'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c3'; ---------- Test4: terminate (using signal QUIT) QE exec backend 1: BEGIN; 1: DECLARE c4 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c4'; +1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c4'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c4', -1); -- some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving 0R: @pre_run 'set_endpoint_variable @ENDPOINT4': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT4"; 1R: @pre_run 'set_endpoint_variable @ENDPOINT4': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT4"; -- get senderpid which is endpoint execution backend -0U: @post_run 'get_tuple_cell PID41 1 1 ; create_sub "${PID41}[ \t]*" senderpid41': SELECT senderpid, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: @post_run 'get_tuple_cell PID41 1 1 ; create_sub "${PID41}[ \t]*" senderpid41': SELECT senderpid, receiverpid<>-1, state FROM gp_get_segment_endpoints(); -- run 'kill -s QUIT senderpid' to cancel the endpoint execution backend, retrieve session still can work 42: @pre_run 'kill -s QUIT ${PID41} && echo "${RAW_STR}" ': SELECT 1; -- exit this session because the connection closed, so that it will re-connect next time use this session. @@ -106,20 +106,20 @@ insert into t1 select generate_series(1,100); 1<: -- check no endpoint info left 2q: -2: SELECT state FROM gp_endpoints() WHERE cursorname='c4'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c4'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c4'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c4'; -- report error for EXECUTE canceled PARALLEL RETRIEVE CURSOR 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c4', -1); 1: ROLLBACK; -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c4'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c4'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c4'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c4'; 2Rq: ---------- Test5: terminate (using signal TERM) QE exec backend 1: BEGIN; 1: DECLARE c5 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c5'; +1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c5'; 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c5', -1); -- some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving 0R: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT5"; @@ -129,25 +129,25 @@ insert into t1 select generate_series(1,100); 1Rq: 1R: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT5"; -- get senderpid which is endpoint execution backend -0U: @post_run 'get_tuple_cell PID51 1 1 ; create_sub "${PID51}[ \t]*" senderpid51': SELECT senderpid, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: @post_run 'get_tuple_cell PID51 1 1 ; create_sub "${PID51}[ \t]*" senderpid51': SELECT senderpid, receiverpid<>-1, state FROM gp_get_segment_endpoints(); -- run 'kill -s TERM senderpid' to cancel the endpoint execution backend, retrieve session still can work 42: @pre_run 'kill -s TERM ${PID51} && echo "${RAW_STR}" ': SELECT 1; -- check it can cancel the "gp_wait_parallel_retrieve_cursor" 1<: -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c5'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c5'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c5'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c5'; -- report error for EXECUTE canceled PARALLEL RETRIEVE CURSOR 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c5', -1); 1: ROLLBACK; -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c5'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c5'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c5'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c5'; ---------- Test6: Cancel (using signal INT) the process of 'CHECK PARALLEL RETRIEVE CURSOR' 1: BEGIN; 1: DECLARE c6 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c6'; +1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c6'; -- get backend pid of this session which run 'gp_wait_parallel_retrieve_cursor' 1: @post_run 'get_tuple_cell PID61 1 1 ; create_sub "${PID61}[ \t]*" QDPid61': select pg_backend_pid(); 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c6', -1); @@ -159,19 +159,19 @@ insert into t1 select generate_series(1,100); -- check it can cancel the "gp_wait_parallel_retrieve_cursor" 1<: -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c6'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c6'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c6'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c6'; -- report error for EXECUTE canceled PARALLEL RETRIEVE CURSOR 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c6', -1); 1: ROLLBACK; -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c6'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c6'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c6'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c6'; ---------- Test6.1: Cancel (using signal INT) the process of 'CHECK PARALLEL RETRIEVE CURSOR' without rollback 1: BEGIN; 1: DECLARE c61 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 61 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c61'; +1: @post_run 'parse_endpoint_info 61 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c61'; -- get backend pid of this session which run 'gp_wait_parallel_retrieve_cursor' 1: @post_run 'get_tuple_cell PID611 1 1 ; create_sub "${PID611}[ \t]*" QDPid611': select pg_backend_pid(); 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c61', -1); @@ -183,27 +183,27 @@ insert into t1 select generate_series(1,100); -- check it can cancel the "gp_wait_parallel_retrieve_cursor" 1<: -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c61'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c61'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c61'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c61'; -- quit the session of 'CHECK PARALLEL RETRIEVE CURSOR' and keep other session connected 1q: -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c61'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c61'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c61'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c61'; 0Rq: 1Rq: ---------- Test7: terminate (using signal QUIT) the process of 'CHECK PARALLEL RETRIEVE CURSOR' 1: BEGIN; 1: DECLARE c7 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'parse_endpoint_info 7 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c7'; +1: @post_run 'parse_endpoint_info 7 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c7'; -- get backend pid of this session which run 'gp_wait_parallel_retrieve_cursor' 1: @post_run 'get_tuple_cell PID71 1 1 ; create_sub "${PID71}[ \t]*" QDPid71': select pg_backend_pid(); 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c7', -1); -- some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving 0R: @pre_run 'set_endpoint_variable @ENDPOINT7': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT7"; 1R: @pre_run 'set_endpoint_variable @ENDPOINT7': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT7"; -2U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c7'; +2U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c7'; -- run 'kill -s QUIT QDPid' to cancel the endpoint execution backend, retrieve session still can work -- here need to sleep sometime to wait for endpoint QE backend to detect QD connection lost. 0U: @pre_run 'kill -s QUIT ${PID71}&& sleep 5 && echo "${RAW_STR}" ': SELECT 1; @@ -214,28 +214,77 @@ insert into t1 select generate_series(1,100); 2q: -1Uq: -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c7'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c7'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c7'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c7'; -- report error for EXECUTE canceled PARALLEL RETRIEVE CURSOR 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c7', -1); 1: ROLLBACK; -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c7'; -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c7'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c7'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c7'; ---------- Test8: Status visibilities for different sessions 1: BEGIN; 1: DECLARE c8 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -1: @post_run 'get_tuple_cell SESSION81 1 1 ; create_match_sub_with_spaces $SESSION81 session81': SELECT sessionid,state FROM gp_session_endpoints() WHERE cursorname='c8'; +1: @post_run 'get_tuple_cell SESSION81 1 1 ; create_match_sub_with_spaces $SESSION81 session81': SELECT sessionid,state FROM gp_get_session_endpoints() WHERE cursorname='c8'; -- Session 2 can only see its own cursors by default. 2: BEGIN; 2: DECLARE c8 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -2: @post_run 'get_tuple_cell SESSION82 1 1 ; create_match_sub_with_spaces $SESSION82 session82': SELECT sessionid,state FROM gp_session_endpoints() WHERE cursorname='c8'; --- Session 2 can see all cursors with gp_endpoints(). -2: SELECT sessionid,state FROM gp_endpoints() WHERE cursorname='c8' order by sessionid; +2: @post_run 'get_tuple_cell SESSION82 1 1 ; create_match_sub_with_spaces $SESSION82 session82': SELECT sessionid,state FROM gp_get_session_endpoints() WHERE cursorname='c8'; +-- Session 2 can see all cursors with gp_get_endpoints(). +2: SELECT sessionid,state FROM gp_get_endpoints() WHERE cursorname='c8' order by sessionid; 1: CLOSE c8; 1: END; 2: CLOSE c8; 2: END; +---------- Test9: Cancel (using pg_cancel_backend(pid)) the process of 'CHECK PARALLEL RETRIEVE CURSOR' +1: BEGIN; +1: DECLARE c9 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; +1: @post_run 'parse_endpoint_info 9 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c9'; + +1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c9', -1); +-- some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving +0R: @pre_run 'set_endpoint_variable @ENDPOINT9': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT9"; +1R: @pre_run 'set_endpoint_variable @ENDPOINT9': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT9"; +-- run pg_cancel_backend(pid) to cancel the endpoint execution backend, retrieve session still can work +2: select pg_cancel_backend(pid) from pg_stat_activity where query like 'SELECT * FROM gp_wait_parallel_retrieve_cursor(''c9'', -1);'; +-- check it can cancel the "gp_wait_parallel_retrieve_cursor" +1<: +-- check no endpoint info left +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c9'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c9'; +-- report error for EXECUTE canceled PARALLEL RETRIEVE CURSOR +1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c9', -1); +1: ROLLBACK; +-- check no endpoint info +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c9'; + +---------- Test10: terminate (using pg_terminate_backend(pid)) the process of 'CHECK PARALLEL RETRIEVE CURSOR' +1: BEGIN; +1: DECLARE c10 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; +1: @post_run 'parse_endpoint_info 10 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c10'; +1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c10', -1); +-- some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving +0R: @pre_run 'set_endpoint_variable @ENDPOINT10': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT10"; +1R: @pre_run 'set_endpoint_variable @ENDPOINT10': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT10"; +2U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c10'; +-- run ' pg_terminate_backend(pid)' to cancel the endpoint execution backend, retrieve session still can work +-- here need to sleep sometime to wait for endpoint QE backend to detect QD connection lost. +2: select pg_terminate_backend(pid) from pg_stat_activity where query like 'SELECT * FROM gp_wait_parallel_retrieve_cursor(''c10'', -1);'; +-- check it can cancel the "gp_wait_parallel_retrieve_cursor" +1<: +-- quit all sessions on the master, because connect lost +1q: +2q: +-1Uq: +-- check no endpoint info left +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c10'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c10'; +-- report error for EXECUTE canceled PARALLEL RETRIEVE CURSOR +1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c10', -1); +1: ROLLBACK; +-- check no endpoint info +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c10'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c10'; diff --git a/src/test/isolation2/output/parallel_retrieve_cursor/corner.source b/src/test/isolation2/output/parallel_retrieve_cursor/corner.source index 20e4f6d1f42..438a106dd9a 100644 --- a/src/test/isolation2/output/parallel_retrieve_cursor/corner.source +++ b/src/test/isolation2/output/parallel_retrieve_cursor/corner.source @@ -36,7 +36,7 @@ INSERT 10 BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 1 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY @@ -44,12 +44,12 @@ DECLARE 1: CLOSE c1; CLOSE -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT auth_token,state FROM gp_get_endpoints() WHERE cursorname='c1'; auth_token | state ------------+------- (0 rows) -- check no token info on QE after close PARALLEL RETRIEVE CURSOR -*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT1'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT1'; state ------- (0 rows) @@ -77,7 +77,7 @@ ROLLBACK BEGIN 1: DECLARE c11 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t11; DECLARE -1: @post_run 'parse_endpoint_info 11 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c11'; +1: @post_run 'parse_endpoint_info 11 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c11'; endpoint_id11 | token_id | host_id | port_id | READY endpoint_id11 | token_id | host_id | port_id | READY endpoint_id11 | token_id | host_id | port_id | READY @@ -85,12 +85,12 @@ DECLARE 1: CLOSE c11; CLOSE -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints() WHERE cursorname='c11'; +1: SELECT auth_token,state FROM gp_get_endpoints() WHERE cursorname='c11'; auth_token | state ------------+------- (0 rows) -- check no token info on QE after close PARALLEL RETRIEVE CURSOR -*U: @pre_run 'set_endpoint_variable @ENDPOINT11': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT11'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT11': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT11'; state ------- (0 rows) @@ -141,7 +141,7 @@ DECLARE 1: ROLLBACK; ROLLBACK -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints(); +1: SELECT auth_token,state FROM gp_get_endpoints(); auth_token | state ------------+------- (0 rows) @@ -164,7 +164,7 @@ ERROR: cursor "c2" does not exist 1: ROLLBACK; ROLLBACK -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints(); +1: SELECT auth_token,state FROM gp_get_endpoints(); auth_token | state ------------+------- (0 rows) @@ -194,7 +194,7 @@ DECLARE DECLARE 1: DECLARE c11 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY @@ -207,7 +207,7 @@ DECLARE (1 row) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- (0 rows) @@ -353,7 +353,7 @@ DECLARE t (1 row) -- check all endpoint state -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ---------- FINISHED @@ -363,7 +363,7 @@ DECLARE 1: ROLLBACK; ROLLBACK -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ------- (0 rows) @@ -381,7 +381,7 @@ DECLARE 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; ERROR: cursor "c1" already exists -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints(); +1: SELECT auth_token,state FROM gp_get_endpoints(); ERROR: current transaction is aborted, commands ignored until end of transaction block 1: ROLLBACK; ROLLBACK @@ -392,14 +392,14 @@ BEGIN DECLARE 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY (3 rows) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- (0 rows) @@ -540,7 +540,7 @@ DECLARE t (1 row) -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ---------- FINISHED @@ -560,11 +560,11 @@ ROLLBACK BEGIN 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1 ORDER BY a LIMIT 10; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; endpoint_id2 | token_id | host_id | port_id | READY (1 row) -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- READY @@ -617,7 +617,7 @@ DECLARE t (1 row) -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ---------- FINISHED @@ -635,7 +635,7 @@ Sessions not started cannot be quit BEGIN 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1 ORDER BY a LIMIT 0; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; endpoint_id2 | token_id | host_id | port_id | READY (1 row) 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', 0); @@ -645,7 +645,7 @@ DECLARE (1 row) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- READY @@ -687,7 +687,7 @@ DECLARE t (1 row) -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ---------- FINISHED @@ -705,7 +705,7 @@ Sessions not started cannot be quit BEGIN 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t2; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY @@ -717,7 +717,7 @@ DECLARE (1 row) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- (0 rows) @@ -763,7 +763,7 @@ DECLARE t (1 row) -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ---------- FINISHED @@ -783,7 +783,7 @@ ROLLBACK BEGIN 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t3; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY @@ -795,7 +795,7 @@ DECLARE (1 row) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- (0 rows) @@ -851,7 +851,7 @@ DECLARE t (1 row) -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ---------- FINISHED @@ -871,7 +871,7 @@ ROLLBACK BEGIN 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t4; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY @@ -883,7 +883,7 @@ DECLARE (1 row) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- (0 rows) @@ -929,7 +929,7 @@ DECLARE t (1 row) -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ---------- FINISHED @@ -949,7 +949,7 @@ ROLLBACK BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1 WHERE a = 50; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id2 | token_id | host_id | port_id | READY (1 row) 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', 0); @@ -959,7 +959,7 @@ DECLARE (1 row) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- READY @@ -981,7 +981,7 @@ DECLARE t (1 row) -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; state ---------- FINISHED @@ -996,7 +996,7 @@ ROLLBACK BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT SUM(a) FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id2 | token_id | host_id | port_id | READY (1 row) 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', 0); @@ -1006,7 +1006,7 @@ DECLARE (1 row) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); --1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +-1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- READY @@ -1027,7 +1027,7 @@ DECLARE ---------- t (1 row) -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; state ---------- FINISHED @@ -1042,11 +1042,11 @@ ROLLBACK BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT AVG(a) FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id2 | token_id | host_id | port_id | READY (1 row) --1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +-1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- READY @@ -1067,7 +1067,7 @@ DECLARE ---------- t (1 row) -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; state ---------- FINISHED @@ -1082,12 +1082,12 @@ ROLLBACK BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT COUNT(*) FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id2 | token_id | host_id | port_id | READY (1 row) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); --1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +-1U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- READY @@ -1103,7 +1103,7 @@ DECLARE ---------- t (1 row) -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; state ---------- FINISHED @@ -1118,7 +1118,7 @@ ROLLBACK BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1, t5 where t1.a = t5.b; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY @@ -1156,7 +1156,7 @@ DECLARE ---------- t (1 row) -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; state ---------- FINISHED @@ -1176,7 +1176,7 @@ ROLLBACK BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT COUNT(*) FROM t1, t5 where t1.a = t5.b; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id2 | token_id | host_id | port_id | READY (1 row) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); @@ -1192,7 +1192,7 @@ DECLARE ---------- t (1 row) -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; state ---------- FINISHED @@ -1207,14 +1207,14 @@ ROLLBACK BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY (3 rows) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- (0 rows) @@ -1358,14 +1358,14 @@ DECLARE CLOSE 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY (3 rows) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- (0 rows) @@ -1505,7 +1505,7 @@ DECLARE ---------- t (1 row) -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; state ---------- FINISHED @@ -1525,14 +1525,14 @@ ROLLBACK BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY (3 rows) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- (0 rows) @@ -1680,14 +1680,14 @@ DECLARE CLOSE 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY (3 rows) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- (0 rows) @@ -1827,7 +1827,7 @@ DECLARE ---------- t (1 row) -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; state ---------- FINISHED @@ -1849,14 +1849,14 @@ BEGIN SAVEPOINT 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY (3 rows) 1: ROLLBACK TO s1; ROLLBACK -1: SELECT state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c1'; state ------- (0 rows) @@ -1888,7 +1888,7 @@ COMMIT BEGIN 1: DECLARE c21a PARALLEL RETRIEVE CURSOR FOR SELECT COUNT(*) from t1; DECLARE -1: @post_run 'get_tuple_cell TOKEN21a 1 1 ; create_match_sub $TOKEN21a token21a' : SELECT auth_token FROM gp_endpoints() WHERE cursorname='c21a'; +1: @post_run 'get_tuple_cell TOKEN21a 1 1 ; create_match_sub $TOKEN21a token21a' : SELECT auth_token FROM gp_get_endpoints() WHERE cursorname='c21a'; auth_token ---------------------------------- token21a @@ -1898,7 +1898,7 @@ DECLARE DECLARE 1: DECLARE c21c PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; DECLARE -1: SELECT auth_token FROM gp_endpoints() WHERE cursorname='c21a'; +1: SELECT auth_token FROM gp_get_endpoints() WHERE cursorname='c21a'; auth_token ---------------------------------- token21a @@ -1916,7 +1916,7 @@ Sessions not started cannot be quit BEGIN 1: DECLARE c22 PARALLEL RETRIEVE CURSOR WITHOUT HOLD FOR SELECT * FROM generate_series(1,10); DECLARE -1: @post_run 'parse_endpoint_info 22 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c22'; +1: @post_run 'parse_endpoint_info 22 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c22'; endpoint_id22 | token_id | host_id | port_id | READY (1 row) 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c22', 0); @@ -1925,7 +1925,7 @@ DECLARE f (1 row) -*U: @pre_run 'set_endpoint_variable @ENDPOINT22': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT22'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT22': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT22'; state ------- READY @@ -1973,7 +1973,7 @@ DECLARE t (1 row) -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c22'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c22'; state ---------- FINISHED @@ -1993,7 +1993,7 @@ Sessions not started cannot be quit BEGIN 1: DECLARE c23 PARALLEL RETRIEVE CURSOR WITHOUT HOLD FOR SELECT relname FROM pg_class where relname='pg_class'; DECLARE -1: @post_run 'parse_endpoint_info 23 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c23'; +1: @post_run 'parse_endpoint_info 23 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c23'; endpoint_id23 | token_id | host_id | port_id | READY (1 row) 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c23', 0); @@ -2002,7 +2002,7 @@ DECLARE f (1 row) -*U: @pre_run 'set_endpoint_variable @ENDPOINT23': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT23'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT23': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT23'; state ------- READY @@ -2041,7 +2041,7 @@ DECLARE t (1 row) -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c23'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c23'; state ---------- FINISHED @@ -2063,17 +2063,17 @@ DECLARE DECLARE 1: DECLARE "x1234567890123456789012345678901234567890123456789012345678901x" PARALLEL RETRIEVE CURSOR WITHOUT HOLD FOR SELECT * FROM t5; DECLARE -1: @post_run 'parse_endpoint_info 24 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='x12345678901234567890123456789012345678901234567890123456789x'; +1: @post_run 'parse_endpoint_info 24 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='x12345678901234567890123456789012345678901234567890123456789x'; endpoint_id24 | token_id | host_id | port_id | READY endpoint_id24 | token_id | host_id | port_id | READY endpoint_id24 | token_id | host_id | port_id | READY (3 rows) -1: @post_run 'parse_endpoint_info 24_1 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='x12345678901234567890123456789012345678901234567890123456789012'; +1: @post_run 'parse_endpoint_info 24_1 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='x12345678901234567890123456789012345678901234567890123456789012'; endpoint_id24_1 | token_id | host_id | port_id | READY endpoint_id24_1 | token_id | host_id | port_id | READY endpoint_id24_1 | token_id | host_id | port_id | READY (3 rows) -1: @post_run 'parse_endpoint_info 24_2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='x1234567890123456789012345678901234567890123456789012345678901x'; +1: @post_run 'parse_endpoint_info 24_2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='x1234567890123456789012345678901234567890123456789012345678901x'; endpoint_id24_2 | token_id | host_id | port_id | READY endpoint_id24_2 | token_id | host_id | port_id | READY endpoint_id24_2 | token_id | host_id | port_id | READY @@ -2176,7 +2176,7 @@ ROLLBACK BEGIN 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t5; DECLARE -1: @post_run 'parse_endpoint_info 26 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 26 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; endpoint_id26 | token_id | host_id | port_id | READY endpoint_id26 | token_id | host_id | port_id | READY endpoint_id26 | token_id | host_id | port_id | READY @@ -2210,7 +2210,7 @@ COMMIT BEGIN 1: DECLARE c27 PARALLEL RETRIEVE CURSOR FOR SELECT generate_series(1,10); DECLARE -1: @post_run 'parse_endpoint_info 27 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c27'; +1: @post_run 'parse_endpoint_info 27 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c27'; endpoint_id27 | token_id | host_id | port_id | READY (1 row) 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c27', 0); @@ -2219,7 +2219,7 @@ DECLARE f (1 row) -*U: @pre_run 'set_endpoint_variable @ENDPOINT27': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT27'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT27': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT27'; state ------- READY @@ -2267,7 +2267,7 @@ DECLARE t (1 row) -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c27'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c27'; state ---------- FINISHED diff --git a/src/test/isolation2/output/parallel_retrieve_cursor/fault_inject.source b/src/test/isolation2/output/parallel_retrieve_cursor/fault_inject.source index 044e288796b..4539da90f54 100644 --- a/src/test/isolation2/output/parallel_retrieve_cursor/fault_inject.source +++ b/src/test/isolation2/output/parallel_retrieve_cursor/fault_inject.source @@ -23,14 +23,14 @@ INSERT 100 BEGIN -- should report error on seg0 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -ERROR: failed to allocate endpoint +ERROR: failed to allocate endpoint for session id xxx 1: ROLLBACK; ROLLBACK -- test same error on another session 3: BEGIN; BEGIN 3: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; -ERROR: failed to allocate endpoint +ERROR: failed to allocate endpoint for session id xxx 3: ROLLBACK; ROLLBACK -- reset the fault injection @@ -49,14 +49,14 @@ ROLLBACK BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY (3 rows) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -*U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1' or endpointname='DUMMYENDPOINTNAME'; +*U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1' or endpointname='DUMMYENDPOINTNAME'; state ------- (0 rows) @@ -199,26 +199,26 @@ DECLARE 1: CLOSE c1; CLOSE -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints() WHERE cursorname='c1' or endpointname='DUMMYENDPOINTNAME'; +1: SELECT auth_token,state FROM gp_get_endpoints() WHERE cursorname='c1' or endpointname='DUMMYENDPOINTNAME'; auth_token | state ------------+------- (0 rows) -- check no token info on QE after close PARALLEL RETRIEVE CURSOR -*U: SELECT * FROM gp_segment_endpoints() WHERE cursorname='c1' or endpointname='DUMMYENDPOINTNAME'; - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ +*U: SELECT * FROM gp_get_segment_endpoints() WHERE cursorname='c1' or endpointname='DUMMYENDPOINTNAME'; + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) 1: ROLLBACK; @@ -240,14 +240,14 @@ ROLLBACK BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY (3 rows) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -0U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +0U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- READY @@ -258,25 +258,25 @@ ERROR: canceling statement due to user request 1<: <... completed> ERROR: canceling MPP operation: "Endpoint retrieve statement aborted" -1U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +1U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- (0 rows) 1R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; -ERROR: the endpoint endpoint_id2 does not exist in the session +ERROR: the endpoint endpoint_id2 does not exist for session id xxx -2U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +2U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- (0 rows) 2R: @pre_run 'set_endpoint_variable @ENDPOINT2': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT2"; -ERROR: the endpoint endpoint_id2 does not exist in the session +ERROR: the endpoint endpoint_id2 does not exist for session id xxx 1<: <... completed> FAILED: Execution failed 1: ROLLBACK; ROLLBACK -1: SELECT auth_token,state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT auth_token,state FROM gp_get_endpoints() WHERE cursorname='c1'; auth_token | state ------------+------- (0 rows) @@ -302,14 +302,14 @@ ROLLBACK BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; DECLARE -1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id3 | token_id | host_id | port_id | READY endpoint_id3 | token_id | host_id | port_id | READY endpoint_id3 | token_id | host_id | port_id | READY (3 rows) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -1U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +1U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- READY @@ -320,25 +320,25 @@ ERROR: canceling statement due to user request 1<: <... completed> ERROR: canceling MPP operation: "Endpoint retrieve statement aborted" -0U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +0U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- (0 rows) 0R: @pre_run 'set_endpoint_variable @ENDPOINT3': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT3"; -ERROR: the endpoint endpoint_id3 does not exist in the session +ERROR: the endpoint endpoint_id3 does not exist for session id xxx -2U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +2U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- (0 rows) 2R: @pre_run 'set_endpoint_variable @ENDPOINT3': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT3"; -ERROR: the endpoint endpoint_id3 does not exist in the session +ERROR: the endpoint endpoint_id3 does not exist for session id xxx 1<: <... completed> FAILED: Execution failed 1: ROLLBACK; ROLLBACK -1: SELECT auth_token,state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT auth_token,state FROM gp_get_endpoints() WHERE cursorname='c1'; auth_token | state ------------+------- (0 rows) @@ -359,14 +359,14 @@ ROLLBACK BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; DECLARE -1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id4 | token_id | host_id | port_id | READY endpoint_id4 | token_id | host_id | port_id | READY endpoint_id4 | token_id | host_id | port_id | READY (3 rows) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -1U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +1U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- READY @@ -413,7 +413,7 @@ DECLARE 98 (37 rows) -0U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +0U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- READY @@ -424,12 +424,12 @@ ERROR: fault triggered, fault name:'fetch_tuples_from_endpoint' fault type:'err 1<: <... completed> ERROR: canceling MPP operation: "Endpoint retrieve statement aborted" (seg0 127.0.0.1:25432 pid=31406) -2U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +2U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- (0 rows) 2R: @pre_run 'set_endpoint_variable @ENDPOINT4': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT4"; -ERROR: the endpoint endpoint_id4 does not exist in the session +ERROR: the endpoint endpoint_id4 does not exist for session id xxx 1<: <... completed> FAILED: Execution failed @@ -477,28 +477,28 @@ ROLLBACK BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; DECLARE -1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id5 | token_id | host_id | port_id | READY endpoint_id5 | token_id | host_id | port_id | READY endpoint_id5 | token_id | host_id | port_id | READY (3 rows) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -0U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +0U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- READY (1 row) 0R&: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT5"; -2U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +2U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- READY (1 row) 2R&: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT5"; -1U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +1U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- READY @@ -577,27 +577,27 @@ SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 5, 5 BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t1; DECLARE -1: @post_run 'parse_endpoint_info 7 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 7 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id7 | token_id | host_id | port_id | READY endpoint_id7 | token_id | host_id | port_id | READY endpoint_id7 | token_id | host_id | port_id | READY (3 rows) -0U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +0U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- READY (1 row) 0R&: @pre_run 'set_endpoint_variable @ENDPOINT7': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT7"; -1U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +1U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- READY (1 row) 1R&: @pre_run 'set_endpoint_variable @ENDPOINT7': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT7"; -2U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +2U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- READY @@ -701,28 +701,28 @@ SELECT gp_inject_fault('fetch_tuples_from_endpoint', 'suspend', '', '', '', 800, BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * from t2; DECLARE -1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id6 | token_id | host_id | port_id | READY endpoint_id6 | token_id | host_id | port_id | READY endpoint_id6 | token_id | host_id | port_id | READY (3 rows) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -0U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +0U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- READY (1 row) 0R&: @pre_run 'set_endpoint_variable @ENDPOINT6': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT6"; -2U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +2U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- READY (1 row) 2R&: @pre_run 'set_endpoint_variable @ENDPOINT6': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT6"; -1U: SELECT state FROM gp_segment_endpoints() WHERE cursorname='c1'; +1U: SELECT state FROM gp_get_segment_endpoints() WHERE cursorname='c1'; state ------- READY diff --git a/src/test/isolation2/output/parallel_retrieve_cursor/privilege.source b/src/test/isolation2/output/parallel_retrieve_cursor/privilege.source index 20bd6837e80..ebb3dd96f67 100644 --- a/src/test/isolation2/output/parallel_retrieve_cursor/privilege.source +++ b/src/test/isolation2/output/parallel_retrieve_cursor/privilege.source @@ -48,7 +48,7 @@ SET BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY @@ -67,11 +67,11 @@ DECLARE 1: DECLARE c12 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM generate_series(1,10); DECLARE --- u1 is able to see all endpoints created by himself. -1: SELECT DISTINCT(cursorname), usename FROM gp_endpoints() AS e, pg_user AS u where e.userid = u.usesysid; - cursorname | usename -------------+--------- - c12 | u1 - c2 | u1 +1: SELECT DISTINCT(cursorname), username FROM gp_get_endpoints(); + cursorname | username +------------+---------- + c12 | u1 + c2 | u1 (2 rows) --- adminuser should be able to see all the endpoints declared by u1 with state READY @@ -82,16 +82,16 @@ SET --------------+-------------- adminuser | adminuser (1 row) -2: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +2: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY (3 rows) -2: @post_run 'parse_endpoint_info 12 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c12'; +2: @post_run 'parse_endpoint_info 12 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c12'; endpoint_id12 | token_id | host_id | port_id | READY (1 row) -2: SELECT DISTINCT(cursorname), usename FROM gp_endpoints() AS e, pg_user AS u where e.userid = u.usesysid; - cursorname | usename +2: SELECT DISTINCT(cursorname), username FROM gp_get_endpoints(); + cursorname | username ------------+----------- c1 | adminuser c12 | u1 @@ -99,8 +99,8 @@ SET (3 rows) --- adminuser should be able to see the cursor state change to READY -2: SELECT auth_token, usename, state FROM gp_endpoints() endpoints, pg_user WHERE endpoints.userid = pg_user.usesysid order by usename; - auth_token | usename | state +2: SELECT auth_token, username, state FROM gp_get_endpoints() endpoints order by username; + auth_token | username | state ----------------------------------+-----------+------- token_id | adminuser | READY token_id | adminuser | READY @@ -119,8 +119,8 @@ SET (1 row) 0R: SELECT SESSION_USER, CURRENT_USER; ERROR: This is a retrieve connection, but the query is not a RETRIEVE. -0U: SELECT auth_token, usename FROM gp_segment_endpoints() AS e, pg_user AS u where e.userid = u.usesysid; - auth_token | usename +0U: SELECT auth_token, username FROM gp_get_segment_endpoints(); + auth_token | username ----------------------------------+----------- token_id | adminuser token_id | u1 @@ -162,18 +162,18 @@ SET (1 row) 1: DECLARE c3 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -2: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c3'; +2: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c3'; endpoint_id3 | token_id | host_id | port_id | READY endpoint_id3 | token_id | host_id | port_id | READY endpoint_id3 | token_id | host_id | port_id | READY (3 rows) --- uu1 can not see u1's endpoints. -1: SELECT DISTINCT(cursorname), usename FROM gp_endpoints() AS e, pg_user AS u where e.userid = u.usesysid; - cursorname | usename -------------+--------- +1: SELECT DISTINCT(cursorname), username FROM gp_get_endpoints(); + cursorname | username +------------+---------- (0 rows) -2: SELECT DISTINCT(cursorname), usename FROM gp_endpoints() AS e, pg_user AS u where e.userid = u.usesysid; - cursorname | usename +2: SELECT DISTINCT(cursorname), username FROM gp_get_endpoints(); + cursorname | username ------------+----------- c1 | adminuser c12 | u1 @@ -233,13 +233,13 @@ SET DECLARE 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -2: @post_run 'parse_endpoint_info 40 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c0'; +2: @post_run 'parse_endpoint_info 40 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c0'; endpoint_id40 | token_id | host_id | port_id | READY endpoint_id40 | token_id | host_id | port_id | READY endpoint_id40 | token_id | host_id | port_id | READY (3 rows) -2: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +2: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id4 | token_id | host_id | port_id | READY endpoint_id4 | token_id | host_id | port_id | READY endpoint_id4 | token_id | host_id | port_id | READY @@ -274,13 +274,13 @@ ERROR: This is a retrieve connection, but the query is not a RETRIEVE. ERROR: the PARALLEL RETRIEVE CURSOR was created by a different user -HINT: Using the same user as the PARALLEL RETRIEVE CURSOR creator to retrieve. +HINT: Use the same user as the PARALLEL RETRIEVE CURSOR creator to retrieve. ERROR: the PARALLEL RETRIEVE CURSOR was created by a different user -HINT: Using the same user as the PARALLEL RETRIEVE CURSOR creator to retrieve. +HINT: Use the same user as the PARALLEL RETRIEVE CURSOR creator to retrieve. ERROR: the PARALLEL RETRIEVE CURSOR was created by a different user -HINT: Using the same user as the PARALLEL RETRIEVE CURSOR creator to retrieve. +HINT: Use the same user as the PARALLEL RETRIEVE CURSOR creator to retrieve. -- cancel the no privilege retrieving endpoints, otherwise it will wait until statement_timeout 42: select pg_cancel_backend(pid) from pg_stat_activity where query like 'SELECT * FROM gp_wait_parallel_retrieve_cursor(''c1'', -1);'; pg_cancel_backend @@ -331,7 +331,7 @@ BEGIN -- Used to let super login to retrieve session so then it can change user in session. 1: DECLARE c0 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 50 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c0'; +1: @post_run 'parse_endpoint_info 50 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c0'; endpoint_id50 | token_id | host_id | port_id | READY endpoint_id50 | token_id | host_id | port_id | READY endpoint_id50 | token_id | host_id | port_id | READY @@ -341,7 +341,7 @@ SET --- c4 is declared and executed by u1 1: DECLARE c4 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c4'; +1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c4'; endpoint_id5 | token_id | host_id | port_id | READY endpoint_id5 | token_id | host_id | port_id | READY endpoint_id5 | token_id | host_id | port_id | READY @@ -350,9 +350,9 @@ DECLARE --- u2 is not able to see u1's endpoints on master 1: SET SESSION AUTHORIZATION u2; SET -1: SELECT * from gp_endpoints(); - dbid | auth_token | cursorname | sessionid | hostname | port | userid | state | endpointname -------+------------+------------+-----------+----------+------+--------+-------+-------------- +1: SELECT * from gp_get_endpoints(); + gp_segment_id | auth_token | cursorname | sessionid | hostname | port | username | state | endpointname +---------------+------------+------------+-----------+----------+------+----------+-------+-------------- (0 rows) --- execute the cursor by u1 @@ -370,24 +370,24 @@ ERROR: This is a retrieve connection, but the query is not a RETRIEVE. ERROR: This is a retrieve connection, but the query is not a RETRIEVE. ERROR: This is a retrieve connection, but the query is not a RETRIEVE. -*U: SELECT auth_token, usename FROM gp_segment_endpoints() AS e, pg_user AS u where e.userid = u.usesysid; - auth_token | usename -------------+--------- +*U: SELECT auth_token, username FROM gp_get_segment_endpoints(); + auth_token | username +------------+---------- (0 rows) - auth_token | usename + auth_token | username ----------------------------------+----------- token_id | adminuser token_id | u1 (2 rows) - auth_token | usename + auth_token | username ----------------------------------+----------- token_id | adminuser token_id | u1 (2 rows) - auth_token | usename + auth_token | username ----------------------------------+----------- token_id | adminuser token_id | u1 @@ -399,13 +399,13 @@ ERROR: This is a retrieve connection, but the query is not a RETRIEVE. ERROR: the PARALLEL RETRIEVE CURSOR was created by a different user -HINT: Using the same user as the PARALLEL RETRIEVE CURSOR creator to retrieve. +HINT: Use the same user as the PARALLEL RETRIEVE CURSOR creator to retrieve. ERROR: the PARALLEL RETRIEVE CURSOR was created by a different user -HINT: Using the same user as the PARALLEL RETRIEVE CURSOR creator to retrieve. +HINT: Use the same user as the PARALLEL RETRIEVE CURSOR creator to retrieve. ERROR: the PARALLEL RETRIEVE CURSOR was created by a different user -HINT: Using the same user as the PARALLEL RETRIEVE CURSOR creator to retrieve. +HINT: Use the same user as the PARALLEL RETRIEVE CURSOR creator to retrieve. -- cancel the no privilege retrieving endpoints, otherwise it will wait until statement_timeout 42: select pg_cancel_backend(pid) from pg_stat_activity where query like 'SELECT * FROM gp_wait_parallel_retrieve_cursor(''c4'', -1);'; pg_cancel_backend diff --git a/src/test/isolation2/output/parallel_retrieve_cursor/replicated_table.source b/src/test/isolation2/output/parallel_retrieve_cursor/replicated_table.source index b1623895081..75aa878a48a 100644 --- a/src/test/isolation2/output/parallel_retrieve_cursor/replicated_table.source +++ b/src/test/isolation2/output/parallel_retrieve_cursor/replicated_table.source @@ -10,18 +10,19 @@ INSERT 100 --------- Test1: Basic test for PARALLEL RETRIEVE CURSOR on replicated table -- Replicated table will execute on seg id: session_id % segment_number --- Declare a cursor and check gp_endpoints(), we can find out the real +-- Declare a cursor and check gp_get_endpoints(), we can find out the real -- segment id by joining gp_segment_configuration. This should equal to -- session_id % 3 (size of demo cluster). 1: BEGIN; BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM rt1; DECLARE -1: SELECT sc.content = current_setting('gp_session_id')::int % 3 AS diff FROM gp_endpoints() ep, gp_segment_configuration sc WHERE ep.dbid = sc.dbid; +1: SELECT sc.content = current_setting('gp_session_id')::int % 3 AS diff FROM gp_get_endpoints() ep, gp_segment_configuration sc WHERE ep.gp_segment_id = sc.content; diff ------ t -(1 row) + t +(2 rows) 1: ROLLBACK; ROLLBACK 1q: ... @@ -56,7 +57,7 @@ DECLARE -- Just declare & CHECK PARALLEL RETRIEVE CURSORs in all segment_number (i.e. 3) sessions, -- so that there should have specific session: MOD(sessionid,3)=1; -- Get token only in specific session id and retrieve this token. -7: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE MOD(sessionid,3)=1 LIMIT 1; +7: @post_run 'parse_endpoint_info 2 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE MOD(sessionid,3)=1 LIMIT 1; endpoint_id2 | token_id | host_id | port_id | READY (1 row) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); @@ -65,7 +66,7 @@ DECLARE 4&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c4', -1); 5&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c5', -1); 6&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c6', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT2'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT2': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT2'; state ------- (0 rows) @@ -106,7 +107,7 @@ DECLARE #2retrieve> connection to server at "host_id", port port_id failed: FATAL: retrieve auth token is invalid -- cancel all 6 sessions -7: select pg_cancel_backend(pid) from pg_stat_activity, gp_endpoints() where sess_id = sessionid AND (cursorname ='c1' or cursorname='c2' or cursorname='c3' or cursorname ='c4' or cursorname='c5' or cursorname='c6'); +7: select pg_cancel_backend(pid) from pg_stat_activity, gp_get_endpoints() where sess_id = sessionid AND (cursorname ='c1' or cursorname='c2' or cursorname='c3' or cursorname ='c4' or cursorname='c5' or cursorname='c6'); pg_cancel_backend ------------------- t @@ -186,7 +187,7 @@ DECLARE -- Just declare & CHECK PARALLEL RETRIEVE CURSORs in all segment_number (i.e. 3) sessions, -- so that there should have specific session: MOD(sessionid,3)=1; -- Get token only in specific session id and retrieve this token. -7: @post_run 'parse_endpoint_info 3 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE MOD(sessionid,3)=1 LIMIT 1; +7: @post_run 'parse_endpoint_info 3 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE MOD(sessionid,3)=1 LIMIT 1; endpoint_id3 | token_id | host_id | port_id | READY (1 row) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); @@ -195,7 +196,7 @@ DECLARE 4&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c4', -1); 5&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c5', -1); 6&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c6', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT3': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT3'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT3': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT3'; state ------- (0 rows) @@ -236,7 +237,7 @@ DECLARE #2retrieve> connection to server at "host_id", port port_id failed: FATAL: retrieve auth token is invalid -- cancel all 6 sessions -7: select pg_cancel_backend(pid) from pg_stat_activity, gp_endpoints() where sess_id = sessionid AND (cursorname ='c1' or cursorname='c2' or cursorname='c3' or cursorname ='c4' or cursorname='c5' or cursorname='c6'); +7: select pg_cancel_backend(pid) from pg_stat_activity, gp_get_endpoints() where sess_id = sessionid AND (cursorname ='c1' or cursorname='c2' or cursorname='c3' or cursorname ='c4' or cursorname='c5' or cursorname='c6'); pg_cancel_backend ------------------- t @@ -316,7 +317,7 @@ DECLARE -- Just declare & CHECK PARALLEL RETRIEVE CURSORs in all segment_number (i.e. 3) sessions, -- so that there should have specific session: MOD(sessionid,3)=1; -- Get token only in specific session id and retrieve this token. -7: @post_run 'parse_endpoint_info 4 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE MOD(sessionid,3)=1 LIMIT 1; +7: @post_run 'parse_endpoint_info 4 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE MOD(sessionid,3)=1 LIMIT 1; endpoint_id4 | token_id | host_id | port_id | READY (1 row) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); @@ -325,7 +326,7 @@ DECLARE 4&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c4', -1); 5&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c5', -1); 6&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c6', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT4': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT4'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT4': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT4'; state ------- (0 rows) @@ -366,7 +367,7 @@ DECLARE #2retrieve> connection to server at "host_id", port port_id failed: FATAL: retrieve auth token is invalid -- cancel all 6 sessions -7: select pg_cancel_backend(pid) from pg_stat_activity, gp_endpoints() where sess_id = sessionid AND (cursorname ='c1' or cursorname='c2' or cursorname='c3' or cursorname ='c4' or cursorname='c5' or cursorname='c6'); +7: select pg_cancel_backend(pid) from pg_stat_activity, gp_get_endpoints() where sess_id = sessionid AND (cursorname ='c1' or cursorname='c2' or cursorname='c3' or cursorname ='c4' or cursorname='c5' or cursorname='c6'); pg_cancel_backend ------------------- t diff --git a/src/test/isolation2/output/parallel_retrieve_cursor/retrieve_quit_check.source b/src/test/isolation2/output/parallel_retrieve_cursor/retrieve_quit_check.source index 98675ab8522..ccb3dbe61f8 100644 --- a/src/test/isolation2/output/parallel_retrieve_cursor/retrieve_quit_check.source +++ b/src/test/isolation2/output/parallel_retrieve_cursor/retrieve_quit_check.source @@ -17,17 +17,17 @@ DECLARE DECLARE 1: DECLARE c3 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY (3 rows) -1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY (3 rows) -1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c3'; +1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c3'; endpoint_id3 | token_id | host_id | port_id | READY endpoint_id3 | token_id | host_id | port_id | READY endpoint_id3 | token_id | host_id | port_id | READY @@ -114,7 +114,7 @@ ERROR: another session (pid: 49941) used the endpoint and completed retrieving 24 (10 rows) -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); cursorname | state ------------+---------- c1 | ATTACHED @@ -128,7 +128,7 @@ ERROR: another session (pid: 49941) used the endpoint and completed retrieving c3 | READY (9 rows) -- verify endpoints on seg0 for c2 has been finishied -0U: SELECT cursorname,senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: SELECT cursorname,senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints(); cursorname | ?column? | ?column? | state ------------+----------+----------+---------- c1 | t | t | ATTACHED @@ -156,7 +156,7 @@ ERROR: another session (pid: 49941) used the endpoint and completed retrieving -- by this retrieve process should be cancelled. -- The endpoint on seg0 for c1 should firstly become to RELEASED (the retrieve process set it), -- and then was removed (during the endpoint QE cancelled) -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); cursorname | state ------------+---------- c1 | READY @@ -170,7 +170,7 @@ ERROR: another session (pid: 49941) used the endpoint and completed retrieving (8 rows) -- verify endpoints for c1 is gone -0U: SELECT cursorname, senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: SELECT cursorname, senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints(); cursorname | ?column? | ?column? | state ------------+----------+----------+---------- c2 | f | t | FINISHED @@ -185,7 +185,7 @@ ERROR: canceling MPP operation: "Endpoint retrieve session is quitting. All unf ERROR: current transaction is aborted, commands ignored until end of transaction block 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c3', 0); ERROR: current transaction is aborted, commands ignored until end of transaction block -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); cursorname | state ------------+------- (0 rows) @@ -193,7 +193,7 @@ ERROR: current transaction is aborted, commands ignored until end of transactio 1: END; END -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); cursorname | state ------------+------- (0 rows) diff --git a/src/test/isolation2/output/parallel_retrieve_cursor/retrieve_quit_wait.source b/src/test/isolation2/output/parallel_retrieve_cursor/retrieve_quit_wait.source index b330089e9a7..843a2e50834 100644 --- a/src/test/isolation2/output/parallel_retrieve_cursor/retrieve_quit_wait.source +++ b/src/test/isolation2/output/parallel_retrieve_cursor/retrieve_quit_wait.source @@ -17,17 +17,17 @@ DECLARE DECLARE 1: DECLARE c3 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY (3 rows) -1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY (3 rows) -1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c3'; +1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c3'; endpoint_id3 | token_id | host_id | port_id | READY endpoint_id3 | token_id | host_id | port_id | READY endpoint_id3 | token_id | host_id | port_id | READY @@ -36,7 +36,7 @@ DECLARE -- Wait until the c2 has been fully retrieved 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); cursorname | state ------------+------- c1 | READY @@ -220,7 +220,7 @@ DECLARE t (1 row) -0U: SELECT cursorname,senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: SELECT cursorname,senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints(); cursorname | ?column? | ?column? | state ------------+----------+----------+---------- c1 | t | t | ATTACHED @@ -241,7 +241,7 @@ DECLARE -- by this retrieve process should be cancelled. -- The endpoint on seg0 for c1 should firstly become to RELEASED (the retrieve process set it), -- and then was removed (during the endpoint QE cancelled) -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); cursorname | state ------------+---------- c1 | ATTACHED @@ -258,7 +258,7 @@ DECLARE 1: END; ERROR: canceling MPP operation: "Endpoint retrieve session is quitting. All unfinished parallel retrieve cursors on the session will be terminated." (seg0 192.168.235.128:7002 pid=69967) -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); cursorname | state ------------+------- (0 rows) @@ -273,17 +273,17 @@ DECLARE DECLARE 1: DECLARE c6 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c4'; +1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c4'; endpoint_id4 | token_id | host_id | port_id | READY endpoint_id4 | token_id | host_id | port_id | READY endpoint_id4 | token_id | host_id | port_id | READY (3 rows) -1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c5'; +1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c5'; endpoint_id5 | token_id | host_id | port_id | READY endpoint_id5 | token_id | host_id | port_id | READY endpoint_id5 | token_id | host_id | port_id | READY (3 rows) -1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c6'; +1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c6'; endpoint_id6 | token_id | host_id | port_id | READY endpoint_id6 | token_id | host_id | port_id | READY endpoint_id6 | token_id | host_id | port_id | READY @@ -455,7 +455,7 @@ DECLARE (25 rows) -- skip TOKEN3 in this session -0U: SELECT cursorname,senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: SELECT cursorname,senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints(); cursorname | ?column? | ?column? | state ------------+----------+----------+---------- c4 | t | t | ATTACHED @@ -469,7 +469,7 @@ DECLARE ERROR: canceling MPP operation: "Endpoint retrieve session is quitting. All unfinished parallel retrieve cursors on the session will be terminated." (seg0 192.168.235.128:7002 pid=70221) -- All endpoints should be removed since error happened. -2: SELECT cursorname, state FROM gp_endpoints(); +2: SELECT cursorname, state FROM gp_get_endpoints(); cursorname | state ------------+------- (0 rows) diff --git a/src/test/isolation2/output/parallel_retrieve_cursor/security.source b/src/test/isolation2/output/parallel_retrieve_cursor/security.source index 5f33e046087..24c492f6732 100644 --- a/src/test/isolation2/output/parallel_retrieve_cursor/security.source +++ b/src/test/isolation2/output/parallel_retrieve_cursor/security.source @@ -26,7 +26,7 @@ CREATE BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY @@ -82,15 +82,15 @@ ERROR: This is a retrieve connection, but the query is not a RETRIEVE. -- Test: Different illegal endpoints always lead to an error ---- invalid endpoints 1R: RETRIEVE ALL FROM ENDPOINT abc; -ERROR: the endpoint abc does not exist in the session +ERROR: the endpoint abc does not exist for session id xxx 1R: RETRIEVE ALL FROM ENDPOINT 123; ERROR: syntax error at or near "123" LINE 1: RETRIEVE ALL FROM ENDPOINT 123; ^ 1R: RETRIEVE ALL FROM ENDPOINT tk1122; -ERROR: the endpoint tk1122 does not exist in the session +ERROR: the endpoint tk1122 does not exist for session id xxx 1R: RETRIEVE ALL FROM ENDPOINT tktt223344556677889900112233445566; -ERROR: the endpoint tktt223344556677889900112233445566 does not exist in the session +ERROR: the endpoint tktt223344556677889900112233445566 does not exist for session id xxx -- Retrieve data. *R: @pre_run 'set_endpoint_variable @ENDPOINT1': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT1"; diff --git a/src/test/isolation2/output/parallel_retrieve_cursor/special_query.source b/src/test/isolation2/output/parallel_retrieve_cursor/special_query.source index 33dd1a94ca5..bd0bd9d9ab0 100644 --- a/src/test/isolation2/output/parallel_retrieve_cursor/special_query.source +++ b/src/test/isolation2/output/parallel_retrieve_cursor/special_query.source @@ -29,7 +29,7 @@ SELECT make_record(x) FROM t1; BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT make_record(x) FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 1 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4': SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY @@ -42,7 +42,7 @@ DECLARE (1 row) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT1'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT1'; state ------- (0 rows) @@ -144,7 +144,7 @@ BEGIN (1 row) 2: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t2 join t2 t12 on true; DECLARE -2: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ------- READY diff --git a/src/test/isolation2/output/parallel_retrieve_cursor/status_check.source b/src/test/isolation2/output/parallel_retrieve_cursor/status_check.source index 4f9a55498c7..fd7fb2e85eb 100644 --- a/src/test/isolation2/output/parallel_retrieve_cursor/status_check.source +++ b/src/test/isolation2/output/parallel_retrieve_cursor/status_check.source @@ -1,4 +1,4 @@ --- @Description Tests the state for pg_endpoints AND gp_segment_endpoints(), focus in nowait mode +-- @Description Tests the state for pg_endpoints AND gp_get_segment_endpoints(), focus in nowait mode -- need to fault injection to gp_wait_parallel_retrieve_cursor() -- DROP TABLE IF EXISTS t1; @@ -13,7 +13,7 @@ INSERT 100 BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY @@ -24,7 +24,7 @@ DECLARE f (1 row) -*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT1'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT1'; state ------- (0 rows) @@ -167,26 +167,26 @@ DECLARE 1: CLOSE c1; CLOSE -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT auth_token,state FROM gp_get_endpoints() WHERE cursorname='c1'; auth_token | state ------------+------- (0 rows) -- check no token info on QE after close PARALLEL RETRIEVE CURSOR -*U: SELECT * FROM gp_segment_endpoints() WHERE cursorname='c1'; - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ +*U: SELECT * FROM gp_get_segment_endpoints() WHERE cursorname='c1'; + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) -- error out for closed cursor @@ -200,7 +200,7 @@ ROLLBACK BEGIN 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY @@ -227,14 +227,14 @@ DECLARE (1 row) -- check initial state after "CHECK PARALLEL RETRIEVE CURSOR" -2: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ---------- ATTACHED READY READY (3 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c2'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c2'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -309,14 +309,14 @@ DECLARE 95 98 (37 rows) -2: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ---------- ATTACHED FINISHED READY (3 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c2'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c2'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -399,14 +399,14 @@ DECLARE 96 100 (25 rows) -2: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ---------- FINISHED FINISHED FINISHED (3 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c2'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c2'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -435,26 +435,26 @@ DECLARE COMMIT -- check the cursor auto closed when transaction closed -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ------- (0 rows) -- check no token info on QE after close PARALLEL RETRIEVE CURSOR -*U: SELECT * FROM gp_segment_endpoints() WHERE cursorname='c2'; - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ +*U: SELECT * FROM gp_get_segment_endpoints() WHERE cursorname='c2'; + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) -- error out for closed cursor @@ -468,7 +468,7 @@ ROLLBACK BEGIN 1: DECLARE c3 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c3'; +1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c3'; endpoint_id3 | token_id | host_id | port_id | READY endpoint_id3 | token_id | host_id | port_id | READY endpoint_id3 | token_id | host_id | port_id | READY @@ -541,7 +541,7 @@ DETAIL: An endpoint can only be attached by one retrieving session. 98 (37 rows) -- get senderpid which is endpoint execution backend -0U: @post_run 'get_tuple_cell PID31 1 1 ; create_sub "$PID31[ \t]*" senderpid31': SELECT senderpid, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: @post_run 'get_tuple_cell PID31 1 1 ; create_sub "$PID31[ \t]*" senderpid31': SELECT senderpid, receiverpid<>-1, state FROM gp_get_segment_endpoints(); senderpid | ?column? | state -----------+----------+---------- senderpid31| t | ATTACHED @@ -558,7 +558,7 @@ DETAIL: An endpoint can only be attached by one retrieving session. ---------- (1 row) -2: SELECT state FROM gp_endpoints() WHERE cursorname='c3'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c3'; state ---------- FINISHED @@ -567,11 +567,11 @@ DETAIL: An endpoint can only be attached by one retrieving session. 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c3', 0); ERROR: canceling MPP operation (seg0 192.168.235.128:7002 pid=67934) -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c3'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c3'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c3'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c3'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -593,11 +593,11 @@ ERROR: current transaction is aborted, commands ignored until end of transactio 1: ROLLBACK; ROLLBACK -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c3'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c3'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c3'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c3'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -619,7 +619,7 @@ ROLLBACK BEGIN 1: DECLARE c4 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c4'; +1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c4'; endpoint_id4 | token_id | host_id | port_id | READY endpoint_id4 | token_id | host_id | port_id | READY endpoint_id4 | token_id | host_id | port_id | READY @@ -686,7 +686,7 @@ DECLARE 98 (37 rows) -- get senderpid which is endpoint execution backend -0U: @post_run 'get_tuple_cell PID41 1 1 ; create_sub "${PID41}[ \t]*" senderpid41': SELECT senderpid, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: @post_run 'get_tuple_cell PID41 1 1 ; create_sub "${PID41}[ \t]*" senderpid41': SELECT senderpid, receiverpid<>-1, state FROM gp_get_segment_endpoints(); senderpid | ?column? | state -----------+----------+---------- senderpid41| t | ATTACHED @@ -706,7 +706,7 @@ DECLARE ---------- (1 row) -2: SELECT state FROM gp_endpoints() WHERE cursorname='c4'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c4'; ERROR: Error on receive from seg0 10.34.58.56:25432 pid=41925: server closed the connection unexpectedly This probably means the server terminated abnormally before or while processing the request. @@ -716,11 +716,11 @@ ERROR: Error on receive from seg0 192.168.235.128:7002 pid=68097: server closed before or while processing the request. -- check no endpoint info left 2q: ... -2: SELECT state FROM gp_endpoints() WHERE cursorname='c4'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c4'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c4'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c4'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -742,11 +742,11 @@ ERROR: current transaction is aborted, commands ignored until end of transactio 1: ROLLBACK; ROLLBACK -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c4'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c4'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c4'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c4'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -769,7 +769,7 @@ ROLLBACK BEGIN 1: DECLARE c5 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c5'; +1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c5'; endpoint_id5 | token_id | host_id | port_id | READY endpoint_id5 | token_id | host_id | port_id | READY endpoint_id5 | token_id | host_id | port_id | READY @@ -796,7 +796,7 @@ DECLARE (10 rows) -- 1R still bind to Test4 session, so can not retrieve from current endpoint. 1R: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT5"; -ERROR: the endpoint endpoint_id5 does not exist in the session +ERROR: the endpoint endpoint_id5 does not exist for session id xxx -- Since seg1 retrieve session is bind to Test4 session. And Test4 session get killed. We need to restart it. 1Rq: ... 1R: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT5"; @@ -841,7 +841,7 @@ ERROR: the endpoint endpoint_id5 does not exist in the session 98 (37 rows) -- get senderpid which is endpoint execution backend -0U: @post_run 'get_tuple_cell PID51 1 1 ; create_sub "${PID51}[ \t]*" senderpid51': SELECT senderpid, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: @post_run 'get_tuple_cell PID51 1 1 ; create_sub "${PID51}[ \t]*" senderpid51': SELECT senderpid, receiverpid<>-1, state FROM gp_get_segment_endpoints(); senderpid | ?column? | state -----------+----------+---------- senderpid51| t | ATTACHED @@ -858,7 +858,7 @@ ERROR: the endpoint endpoint_id5 does not exist in the session ---------- (1 row) -2: SELECT state FROM gp_endpoints() WHERE cursorname='c5'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c5'; state ---------- FINISHED @@ -867,11 +867,11 @@ ERROR: the endpoint endpoint_id5 does not exist in the session 1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c5', 0); ERROR: terminating connection due to administrator command (seg0 192.168.235.128:7002 pid=68210) -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c5'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c5'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c5'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c5'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -893,11 +893,11 @@ ERROR: current transaction is aborted, commands ignored until end of transactio 1: ROLLBACK; ROLLBACK -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c5'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c5'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c5'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c5'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -930,7 +930,7 @@ ROLLBACK BEGIN 1: DECLARE c6 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c6'; +1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c6'; endpoint_id6 | token_id | host_id | port_id | READY endpoint_id6 | token_id | host_id | port_id | READY endpoint_id6 | token_id | host_id | port_id | READY @@ -1009,11 +1009,11 @@ DECLARE 1<: <... completed> ERROR: canceling statement due to user request -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c6'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c6'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c6'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c6'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -1035,11 +1035,11 @@ ERROR: current transaction is aborted, commands ignored until end of transactio 1: ROLLBACK; ROLLBACK -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c6'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c6'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c6'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c6'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -1072,7 +1072,7 @@ ROLLBACK BEGIN 1: DECLARE c61 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 61 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c61'; +1: @post_run 'parse_endpoint_info 61 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c61'; endpoint_id61 | token_id | host_id | port_id | READY endpoint_id61 | token_id | host_id | port_id | READY endpoint_id61 | token_id | host_id | port_id | READY @@ -1151,11 +1151,11 @@ DECLARE 1<: <... completed> ERROR: canceling statement due to user request -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c61'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c61'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c61'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c61'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -1174,11 +1174,11 @@ ERROR: canceling statement due to user request -- quit the session of 'CHECK PARALLEL RETRIEVE CURSOR' and keep other session connected 1q: ... -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c61'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c61'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c61'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c61'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -1212,7 +1212,7 @@ ERROR: canceling statement due to user request BEGIN 1: DECLARE c7 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 7 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c7'; +1: @post_run 'parse_endpoint_info 7 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c7'; endpoint_id7 | token_id | host_id | port_id | READY endpoint_id7 | token_id | host_id | port_id | READY endpoint_id7 | token_id | host_id | port_id | READY @@ -1279,7 +1279,7 @@ DECLARE 95 98 (37 rows) -2U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c7'; +2U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c7'; ?column? | ?column? | state ----------+----------+------- t | f | READY @@ -1302,11 +1302,11 @@ server closed the connection unexpectedly 2q: ... -1Uq: ... -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c7'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c7'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c7'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c7'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -1328,11 +1328,11 @@ ERROR: cursor "c7" does not exist 1: ROLLBACK; ROLLBACK -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c7'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c7'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c7'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c7'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -1354,7 +1354,7 @@ ROLLBACK BEGIN 1: DECLARE c8 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'get_tuple_cell SESSION81 1 1 ; create_match_sub_with_spaces $SESSION81 session81' : SELECT sessionid,state FROM gp_session_endpoints() WHERE cursorname='c8'; +1: @post_run 'get_tuple_cell SESSION81 1 1 ; create_match_sub_with_spaces $SESSION81 session81' : SELECT sessionid,state FROM gp_get_session_endpoints() WHERE cursorname='c8'; sessionid | state -----------+------- session81 | READY @@ -1366,15 +1366,15 @@ DECLARE BEGIN 2: DECLARE c8 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -2: @post_run 'get_tuple_cell SESSION82 1 1 ; create_match_sub_with_spaces $SESSION82 session82' : SELECT sessionid,state FROM gp_session_endpoints() WHERE cursorname='c8'; +2: @post_run 'get_tuple_cell SESSION82 1 1 ; create_match_sub_with_spaces $SESSION82 session82' : SELECT sessionid,state FROM gp_get_session_endpoints() WHERE cursorname='c8'; sessionid | state -----------+------- session82 | READY session82 | READY session82 | READY (3 rows) --- Session 2 can see all cursors with gp_endpoints(). -2: SELECT sessionid,state FROM gp_endpoints() WHERE cursorname='c8' order by sessionid; +-- Session 2 can see all cursors with gp_get_endpoints(). +2: SELECT sessionid,state FROM gp_get_endpoints() WHERE cursorname='c8' order by sessionid; sessionid | state -----------+------- session82 | READY diff --git a/src/test/isolation2/output/parallel_retrieve_cursor/status_wait.source b/src/test/isolation2/output/parallel_retrieve_cursor/status_wait.source index 21d46b76c84..9ec2cf6e17e 100644 --- a/src/test/isolation2/output/parallel_retrieve_cursor/status_wait.source +++ b/src/test/isolation2/output/parallel_retrieve_cursor/status_wait.source @@ -1,4 +1,4 @@ --- @Description Tests the state for pg_endpoints AND gp_segment_endpoints(), focus in wait mode +-- @Description Tests the state for pg_endpoints AND gp_get_segment_endpoints(), focus in wait mode -- DROP TABLE IF EXISTS t1; DROP @@ -12,14 +12,14 @@ INSERT 100 BEGIN 1: DECLARE c1 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c1'; +1: @post_run 'parse_endpoint_info 1 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c1'; endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY endpoint_id1 | token_id | host_id | port_id | READY (3 rows) 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c1', -1); -*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_segment_endpoints() WHERE endpointname='@ENDPOINT1'; +*U: @pre_run 'set_endpoint_variable @ENDPOINT1': SELECT state FROM gp_get_segment_endpoints() WHERE endpointname='@ENDPOINT1'; state ------- (0 rows) @@ -162,26 +162,26 @@ DECLARE 1: CLOSE c1; CLOSE -- check no endpoint info -1: SELECT auth_token,state FROM gp_endpoints() WHERE cursorname='c1'; +1: SELECT auth_token,state FROM gp_get_endpoints() WHERE cursorname='c1'; auth_token | state ------------+------- (0 rows) -- check no token info on QE after close PARALLEL RETRIEVE CURSOR -*U: SELECT * FROM gp_segment_endpoints() WHERE cursorname='c1'; - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ +*U: SELECT * FROM gp_get_segment_endpoints() WHERE cursorname='c1'; + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) -- error out for closed cursor @@ -195,7 +195,7 @@ ROLLBACK BEGIN 1: DECLARE c2 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c2'; +1: @post_run 'parse_endpoint_info 2 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c2'; endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY endpoint_id2 | token_id | host_id | port_id | READY @@ -218,14 +218,14 @@ DECLARE 1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c2', -1); -- check initial state after "CHECK PARALLEL RETRIEVE CURSOR" -2: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ---------- ATTACHED READY READY (3 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c2'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c2'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -300,14 +300,14 @@ DECLARE 95 98 (37 rows) -2: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ---------- ATTACHED FINISHED READY (3 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c2'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c2'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -390,14 +390,14 @@ DECLARE 96 100 (25 rows) -2: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ---------- FINISHED FINISHED FINISHED (3 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c2'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c2'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -426,26 +426,26 @@ DECLARE COMMIT -- check the cursor auto closed when transaction closed -- check no endpoint info -1: SELECT state FROM gp_endpoints() WHERE cursorname='c2'; +1: SELECT state FROM gp_get_endpoints() WHERE cursorname='c2'; state ------- (0 rows) -- check no token info on QE after close PARALLEL RETRIEVE CURSOR -*U: SELECT * FROM gp_segment_endpoints() WHERE cursorname='c2'; - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ +*U: SELECT * FROM gp_get_segment_endpoints() WHERE cursorname='c2'; + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) - auth_token | databaseid | senderpid | receiverpid | state | dbid | sessionid | userid | endpointname | cursorname -------------+------------+-----------+-------------+-------+------+-----------+--------+--------------+------------ + auth_token | databaseid | senderpid | receiverpid | state | gp_segment_id | sessionid | username | endpointname | cursorname +------------+------------+-----------+-------------+-------+---------------+-----------+----------+--------------+------------ (0 rows) -- error out for closed cursor @@ -459,7 +459,7 @@ ROLLBACK BEGIN 1: DECLARE c3 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c3'; +1: @post_run 'parse_endpoint_info 3 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c3'; endpoint_id3 | token_id | host_id | port_id | READY endpoint_id3 | token_id | host_id | port_id | READY endpoint_id3 | token_id | host_id | port_id | READY @@ -528,7 +528,7 @@ DETAIL: An endpoint can only be attached by one retrieving session. 98 (37 rows) -- get senderpid which is endpoint execution backend -0U: @post_run 'get_tuple_cell PID31 1 1 ; create_sub "$PID31[ \t]*" senderpid31': SELECT senderpid, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: @post_run 'get_tuple_cell PID31 1 1 ; create_sub "$PID31[ \t]*" senderpid31': SELECT senderpid, receiverpid<>-1, state FROM gp_get_segment_endpoints(); senderpid | ?column? | state -----------+----------+---------- senderpid31| t | ATTACHED @@ -543,11 +543,11 @@ DETAIL: An endpoint can only be attached by one retrieving session. 1<: <... completed> ERROR: canceling MPP operation -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c3'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c3'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c3'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c3'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -569,11 +569,11 @@ ERROR: current transaction is aborted, commands ignored until end of transactio 1: ROLLBACK; ROLLBACK -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c3'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c3'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c3'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c3'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -595,7 +595,7 @@ ROLLBACK BEGIN 1: DECLARE c4 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c4'; +1: @post_run 'parse_endpoint_info 4 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c4'; endpoint_id4 | token_id | host_id | port_id | READY endpoint_id4 | token_id | host_id | port_id | READY endpoint_id4 | token_id | host_id | port_id | READY @@ -658,7 +658,7 @@ DECLARE 98 (37 rows) -- get senderpid which is endpoint execution backend -0U: @post_run 'get_tuple_cell PID41 1 1 ; create_sub "${PID41}[ \t]*" senderpid41': SELECT senderpid, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: @post_run 'get_tuple_cell PID41 1 1 ; create_sub "${PID41}[ \t]*" senderpid41': SELECT senderpid, receiverpid<>-1, state FROM gp_get_segment_endpoints(); senderpid | ?column? | state -----------+----------+---------- senderpid41| t | ATTACHED @@ -679,11 +679,11 @@ ERROR: Error on receive from seg0 10.34.50.67:25432 pid=12603: server closed th before or while processing the request. -- check no endpoint info left 2q: ... -2: SELECT state FROM gp_endpoints() WHERE cursorname='c4'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c4'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c4'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c4'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -705,11 +705,11 @@ ERROR: current transaction is aborted, commands ignored until end of transactio 1: ROLLBACK; ROLLBACK -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c4'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c4'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c4'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c4'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -732,7 +732,7 @@ ROLLBACK BEGIN 1: DECLARE c5 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c5'; +1: @post_run 'parse_endpoint_info 5 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c5'; endpoint_id5 | token_id | host_id | port_id | READY endpoint_id5 | token_id | host_id | port_id | READY endpoint_id5 | token_id | host_id | port_id | READY @@ -755,7 +755,7 @@ DECLARE (10 rows) -- 1R still bind to Test4 session, so can not retrieve from current endpoint. 1R: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT5"; -ERROR: the endpoint endpoint_id5 does not exist in the session +ERROR: the endpoint endpoint_id5 does not exist for session id xxx -- Since seg1 retrieve session is bind to Test4 session. And Test4 session get killed. We need to restart it. 1Rq: ... 1R: @pre_run 'set_endpoint_variable @ENDPOINT5': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT5"; @@ -800,7 +800,7 @@ ERROR: the endpoint endpoint_id5 does not exist in the session 98 (37 rows) -- get senderpid which is endpoint execution backend -0U: @post_run 'get_tuple_cell PID51 1 1 ; create_sub "${PID51}[ \t]*" senderpid51': SELECT senderpid, receiverpid<>-1, state FROM gp_segment_endpoints(); +0U: @post_run 'get_tuple_cell PID51 1 1 ; create_sub "${PID51}[ \t]*" senderpid51': SELECT senderpid, receiverpid<>-1, state FROM gp_get_segment_endpoints(); senderpid | ?column? | state -----------+----------+---------- senderpid51| t | ATTACHED @@ -815,11 +815,11 @@ ERROR: the endpoint endpoint_id5 does not exist in the session 1<: <... completed> ERROR: terminating connection due to administrator command (seg0 10.34.50.67:25432 pid=12905) -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c5'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c5'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c5'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c5'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -841,11 +841,11 @@ ERROR: current transaction is aborted, commands ignored until end of transactio 1: ROLLBACK; ROLLBACK -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c5'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c5'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c5'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c5'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -867,7 +867,7 @@ ROLLBACK BEGIN 1: DECLARE c6 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c6'; +1: @post_run 'parse_endpoint_info 6 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c6'; endpoint_id6 | token_id | host_id | port_id | READY endpoint_id6 | token_id | host_id | port_id | READY endpoint_id6 | token_id | host_id | port_id | READY @@ -945,11 +945,11 @@ DECLARE 1<: <... completed> ERROR: canceling statement due to user request -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c6'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c6'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c6'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c6'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -971,11 +971,11 @@ ERROR: current transaction is aborted, commands ignored until end of transactio 1: ROLLBACK; ROLLBACK -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c6'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c6'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c6'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c6'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -997,7 +997,7 @@ ROLLBACK BEGIN 1: DECLARE c61 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 61 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c61'; +1: @post_run 'parse_endpoint_info 61 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c61'; endpoint_id61 | token_id | host_id | port_id | READY endpoint_id61 | token_id | host_id | port_id | READY endpoint_id61 | token_id | host_id | port_id | READY @@ -1075,11 +1075,11 @@ DECLARE 1<: <... completed> ERROR: canceling statement due to user request -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c61'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c61'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c61'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c61'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -1098,11 +1098,11 @@ ERROR: canceling statement due to user request -- quit the session of 'CHECK PARALLEL RETRIEVE CURSOR' and keep other session connected 1q: ... -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c61'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c61'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c61'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c61'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -1126,7 +1126,7 @@ ERROR: canceling statement due to user request BEGIN 1: DECLARE c7 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'parse_endpoint_info 7 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_endpoints() WHERE cursorname='c7'; +1: @post_run 'parse_endpoint_info 7 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c7'; endpoint_id7 | token_id | host_id | port_id | READY endpoint_id7 | token_id | host_id | port_id | READY endpoint_id7 | token_id | host_id | port_id | READY @@ -1194,7 +1194,7 @@ DECLARE 95 98 (37 rows) -2U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c7'; +2U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c7'; ?column? | ?column? | state ----------+----------+------- t | f | READY @@ -1216,11 +1216,11 @@ server closed the connection unexpectedly 2q: ... -1Uq: ... -- check no endpoint info left -2: SELECT state FROM gp_endpoints() WHERE cursorname='c7'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c7'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c7'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c7'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -1242,11 +1242,11 @@ ERROR: cursor "c7" does not exist 1: ROLLBACK; ROLLBACK -- check no endpoint info -2: SELECT state FROM gp_endpoints() WHERE cursorname='c7'; +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c7'; state ------- (0 rows) -*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_segment_endpoints() WHERE cursorname='c7'; +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c7'; ?column? | ?column? | state ----------+----------+------- (0 rows) @@ -1268,7 +1268,7 @@ ROLLBACK BEGIN 1: DECLARE c8 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -1: @post_run 'get_tuple_cell SESSION81 1 1 ; create_match_sub_with_spaces $SESSION81 session81': SELECT sessionid,state FROM gp_session_endpoints() WHERE cursorname='c8'; +1: @post_run 'get_tuple_cell SESSION81 1 1 ; create_match_sub_with_spaces $SESSION81 session81': SELECT sessionid,state FROM gp_get_session_endpoints() WHERE cursorname='c8'; sessionid | state -----------+------- session81 | READY @@ -1280,15 +1280,15 @@ DECLARE BEGIN 2: DECLARE c8 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; DECLARE -2: @post_run 'get_tuple_cell SESSION82 1 1 ; create_match_sub_with_spaces $SESSION82 session82': SELECT sessionid,state FROM gp_session_endpoints() WHERE cursorname='c8'; +2: @post_run 'get_tuple_cell SESSION82 1 1 ; create_match_sub_with_spaces $SESSION82 session82': SELECT sessionid,state FROM gp_get_session_endpoints() WHERE cursorname='c8'; sessionid | state -----------+------- session82 | READY session82 | READY session82 | READY (3 rows) --- Session 2 can see all cursors with gp_endpoints(). -2: SELECT sessionid,state FROM gp_endpoints() WHERE cursorname='c8' order by sessionid; +-- Session 2 can see all cursors with gp_get_endpoints(). +2: SELECT sessionid,state FROM gp_get_endpoints() WHERE cursorname='c8' order by sessionid; sessionid | state -----------+------- session82 | READY @@ -1308,3 +1308,146 @@ CLOSE 2: END; END +---------- Test9: Cancel (using pg_cancel_backend(pid)) the process of 'CHECK PARALLEL RETRIEVE CURSOR' +1: BEGIN; +BEGIN +1: DECLARE c9 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; +DECLARE +1: @post_run 'parse_endpoint_info 9 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c9'; + endpoint_id9 | token_id | host_id | port_id | READY + endpoint_id9 | token_id | host_id | port_id | READY + endpoint_id9 | token_id | host_id | port_id | READY +(3 rows) + +1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c9', -1); +-- some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving +0R: @pre_run 'set_endpoint_variable @ENDPOINT9': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT9"; +ERROR: the endpoint endpoint_id9 does not exist for session id xxx +1R: @pre_run 'set_endpoint_variable @ENDPOINT9': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT9"; +ERROR: the endpoint endpoint_id9 does not exist for session id xxx +-- run pg_cancel_backend(pid) to cancel the endpoint execution backend, retrieve session still can work +2: select pg_cancel_backend(pid) from pg_stat_activity where query like 'SELECT * FROM gp_wait_parallel_retrieve_cursor(''c9'', -1);'; + pg_cancel_backend +------------------- + t +(1 row) +-- check it can cancel the "gp_wait_parallel_retrieve_cursor" +1<: <... completed> +ERROR: canceling statement due to user request +-- check no endpoint info left +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c9'; + state +------- +(0 rows) +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c9'; + ?column? | ?column? | state +----------+----------+------- +(0 rows) + + ?column? | ?column? | state +----------+----------+------- +(0 rows) + + ?column? | ?column? | state +----------+----------+------- +(0 rows) + + ?column? | ?column? | state +----------+----------+------- +(0 rows) +-- report error for EXECUTE canceled PARALLEL RETRIEVE CURSOR +1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c9', -1); +ERROR: current transaction is aborted, commands ignored until end of transaction block +1: ROLLBACK; +ROLLBACK +-- check no endpoint info +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c9'; + state +------- +(0 rows) + +---------- Test10: terminate (using pg_terminate_backend(pid)) the process of 'CHECK PARALLEL RETRIEVE CURSOR' +1: BEGIN; +BEGIN +1: DECLARE c10 PARALLEL RETRIEVE CURSOR FOR SELECT * FROM t1; +DECLARE +1: @post_run 'parse_endpoint_info 10 1 2 3 4' : SELECT endpointname,auth_token,hostname,port,state FROM gp_get_endpoints() WHERE cursorname='c10'; + endpoint_id10 | token_id | host_id | port_id | READY + endpoint_id10 | token_id | host_id | port_id | READY + endpoint_id10 | token_id | host_id | port_id | READY +(3 rows) +1&: SELECT * FROM gp_wait_parallel_retrieve_cursor('c10', -1); +-- some endpoint retrieve partial results, some endpoint finished retrieving, some endpoint not start retrieving +0R: @pre_run 'set_endpoint_variable @ENDPOINT10': RETRIEVE 10 FROM ENDPOINT "@ENDPOINT10"; +ERROR: the endpoint endpoint_id10 does not exist for session id xxx +1R: @pre_run 'set_endpoint_variable @ENDPOINT10': RETRIEVE ALL FROM ENDPOINT "@ENDPOINT10"; +ERROR: the endpoint endpoint_id10 does not exist for session id xxx +2U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c10'; + ?column? | ?column? | state +----------+----------+------- + t | f | READY +(1 row) +-- run ' pg_terminate_backend(pid)' to cancel the endpoint execution backend, retrieve session still can work +-- here need to sleep sometime to wait for endpoint QE backend to detect QD connection lost. +2: select pg_terminate_backend(pid) from pg_stat_activity where query like 'SELECT * FROM gp_wait_parallel_retrieve_cursor(''c10'', -1);'; + pg_terminate_backend +---------------------- + t +(1 row) +-- check it can cancel the "gp_wait_parallel_retrieve_cursor" +1<: <... completed> +FATAL: terminating connection due to administrator command +server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +-- quit all sessions on the master, because connect lost +1q: ... +2q: ... +-1Uq: ... +-- check no endpoint info left +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c10'; + state +------- +(0 rows) +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c10'; + ?column? | ?column? | state +----------+----------+------- +(0 rows) + + ?column? | ?column? | state +----------+----------+------- +(0 rows) + + ?column? | ?column? | state +----------+----------+------- +(0 rows) + + ?column? | ?column? | state +----------+----------+------- +(0 rows) +-- report error for EXECUTE canceled PARALLEL RETRIEVE CURSOR +1: SELECT * FROM gp_wait_parallel_retrieve_cursor('c10', -1); +ERROR: cursor "c10" does not exist +1: ROLLBACK; +ROLLBACK +-- check no endpoint info +2: SELECT state FROM gp_get_endpoints() WHERE cursorname='c10'; + state +------- +(0 rows) +*U: SELECT senderpid<>-1, receiverpid<>-1, state FROM gp_get_segment_endpoints() WHERE cursorname='c10'; + ?column? | ?column? | state +----------+----------+------- +(0 rows) + + ?column? | ?column? | state +----------+----------+------- +(0 rows) + + ?column? | ?column? | state +----------+----------+------- +(0 rows) + + ?column? | ?column? | state +----------+----------+------- +(0 rows) diff --git a/src/test/isolation2/test_parallel_retrieve_cursor_extended_query.c b/src/test/isolation2/test_parallel_retrieve_cursor_extended_query.c index 09f1e7ee0f9..e130c1c5ee7 100644 --- a/src/test/isolation2/test_parallel_retrieve_cursor_extended_query.c +++ b/src/test/isolation2/test_parallel_retrieve_cursor_extended_query.c @@ -223,7 +223,7 @@ exec_check_parallel_cursor(PGconn *master_conn, int isCheckFinish) { int result = 0; PGresult *res1; - const char *check_sql = "SELECT * FROM gp_wait_parallel_retrieve_cursor('myportal', 0);"; + const char *check_sql = "SELECT * FROM pg_catalog.gp_wait_parallel_retrieve_cursor('myportal', 0);"; printf("\n------ Begin checking parallel retrieve cursor status ------\n"); @@ -337,7 +337,7 @@ main(int argc, char **argv) /* * get the endpoints info of this PARALLEL RETRIEVE CURSOR */ - const char *sql1 = "select hostname,port,auth_token,endpointname from pg_catalog.gp_endpoints() where cursorname='myportal';"; + const char *sql1 = "select hostname,port,auth_token,endpointname from pg_catalog.gp_get_endpoints() where cursorname='myportal';"; printf("\nExec SQL on the coordinator:\n\t> %s\n", sql1); res1 = PQexec(master_conn, sql1); diff --git a/src/test/isolation2/test_parallel_retrieve_cursor_extended_query_error.c b/src/test/isolation2/test_parallel_retrieve_cursor_extended_query_error.c index 1bf731d4a89..eddbd8c45f3 100644 --- a/src/test/isolation2/test_parallel_retrieve_cursor_extended_query_error.c +++ b/src/test/isolation2/test_parallel_retrieve_cursor_extended_query_error.c @@ -226,7 +226,7 @@ exec_check_parallel_cursor(PGconn *master_conn, int isCheckFinish) { int result = 0; PGresult *res1; - const char *check_sql = "SELECT * FROM gp_wait_parallel_retrieve_cursor('myportal', 0);"; + const char *check_sql = "SELECT * FROM pg_catalog.gp_wait_parallel_retrieve_cursor('myportal', 0);"; printf("\n------ Begin checking parallel retrieve cursor status ------\n"); @@ -338,7 +338,7 @@ main(int argc, char **argv) /* * get the endpoints info of this PARALLEL RETRIEVE CURSOR */ - const char *sql1 = "select hostname,port,auth_token,endpointname from pg_catalog.gp_endpoints() where cursorname='myportal';"; + const char *sql1 = "select hostname,port,auth_token,endpointname from pg_catalog.gp_get_endpoints() where cursorname='myportal';"; printf("\nExec SQL on Master:\n\t> %s\n", sql1); res1 = PQexec(master_conn, sql1); From aa0396fa48b65ae430c3770a5bd97b52a58ec5a6 Mon Sep 17 00:00:00 2001 From: Jimmy Yih Date: Thu, 3 Mar 2022 15:40:09 -0800 Subject: [PATCH 03/46] Fix regression about PREPARE TRANSACTION in utility-mode connections In GPDB, we do not allow users to use PREPARE TRANSACTION in regular and utility-mode connections to prevent any conflicts/issues with GPDB's distributed transaction manager that heavily utilizes two-phase commit. As part of the Postgres 10 merge into GPDB, a regression was introduced that allowed PREPARE TRANSACTION to be run in utility-mode connections. The error check was being bypassed because the TransactionStmt was not being properly obtained. The cause of this was due to an upstream Postgres refactor that introduced RawStmt which would wrap the TransactionStmt so the TransactionStmt typecast was being done on the wrong parse node (needs to be done on the RawStmt->stmt). Added simple regression test to make sure this regression doesn't occur again from future Postgres merges. Also disable some recovery TAP tests which use PREPARE TRANSACTION in utility-mode connections. Postgres commit reference (RawStmt refactor): https://github.com/postgres/postgres/commit/ab1f0c8225714aaa18d2f9ca4f80cd009f145421 --- src/backend/tcop/postgres.c | 9 ++++----- src/test/recovery/t/009_twophase.pl | 9 ++++++++- src/test/recovery/t/012_subtransactions.pl | 9 ++++++++- src/test/regress/expected/gp_prepared_xacts.out | 7 +++++++ src/test/regress/greenplum_schedule | 2 +- src/test/regress/sql/gp_prepared_xacts.sql | 6 ++++++ 6 files changed, 34 insertions(+), 8 deletions(-) create mode 100644 src/test/regress/expected/gp_prepared_xacts.out create mode 100644 src/test/regress/sql/gp_prepared_xacts.sql diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 48f8f159097..c06e0ed551c 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -1779,12 +1779,11 @@ exec_simple_query(const char *query_string) } /* - * If are connected in utility mode, disallow PREPARE TRANSACTION - * statements. + * GPDB: If we are connected in utility mode, disallow PREPARE + * TRANSACTION statements. */ - TransactionStmt *transStmt = (TransactionStmt *) parsetree; - if (Gp_role == GP_ROLE_UTILITY && IsA(parsetree, TransactionStmt) && - transStmt->kind == TRANS_STMT_PREPARE) + if (Gp_role == GP_ROLE_UTILITY && IsA(parsetree->stmt, TransactionStmt) && + ((TransactionStmt *) parsetree->stmt)->kind == TRANS_STMT_PREPARE) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl index 900d181788c..18a4cfcece3 100644 --- a/src/test/recovery/t/009_twophase.pl +++ b/src/test/recovery/t/009_twophase.pl @@ -7,7 +7,14 @@ use PostgresNode; use TestLib; -use Test::More tests => 24; + +# GPDB: Effectively disable this TAP test. We cannot run PREPARE +# TRANSACTION in utility-mode. We need at least 1 test so create a +# dummy one. +#use Test::More tests => 24; +use Test::More tests => 1; +is(-1, -1, "Disable this TAP test"); +exit; my $psql_out = ''; my $psql_rc = ''; diff --git a/src/test/recovery/t/012_subtransactions.pl b/src/test/recovery/t/012_subtransactions.pl index aa84073311b..207fcaa485b 100644 --- a/src/test/recovery/t/012_subtransactions.pl +++ b/src/test/recovery/t/012_subtransactions.pl @@ -7,7 +7,14 @@ use PostgresNode; use TestLib; -use Test::More tests => 12; + +# GPDB: Effectively disable this TAP test. We cannot run PREPARE +# TRANSACTION in utility-mode. We need at least 1 test so create a +# dummy one. +#use Test::More tests => 12; +use Test::More tests => 1; +is(-1, -1, "Disable this TAP test"); +exit; # Setup primary node my $node_primary = get_new_node("primary"); diff --git a/src/test/regress/expected/gp_prepared_xacts.out b/src/test/regress/expected/gp_prepared_xacts.out new file mode 100644 index 00000000000..b3a1b96bc46 --- /dev/null +++ b/src/test/regress/expected/gp_prepared_xacts.out @@ -0,0 +1,7 @@ +-- PREPARE TRANSACTION should not work +BEGIN; +PREPARE TRANSACTION 'foo_prep_xact'; +ERROR: PREPARE TRANSACTION is not yet supported in Greenplum Database +-- PREPARE TRANSACTION should not work in utility-mode connections either +\! PGOPTIONS='-c gp_role=utility' psql -X regression -c "BEGIN; PREPARE TRANSACTION 'foo_prep_xact';" +ERROR: PREPARE TRANSACTION is not supported in utility mode diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index 66c5129c356..c5838508e85 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -40,7 +40,7 @@ test: instr_in_shmem_setup test: instr_in_shmem test: createdb -test: gp_aggregates gp_aggregates_costs gp_metadata variadic_parameters default_parameters function_extensions spi gp_xml shared_scan update_gp triggers_gp returning_gp resource_queue_with_rule gp_types gp_index cluster_gp combocid_gp gp_sort +test: gp_aggregates gp_aggregates_costs gp_metadata variadic_parameters default_parameters function_extensions spi gp_xml shared_scan update_gp triggers_gp returning_gp resource_queue_with_rule gp_types gp_index cluster_gp combocid_gp gp_sort gp_prepared_xacts test: spi_processed64bit test: gp_tablespace_with_faults # below test(s) inject faults so each of them need to be in a separate group diff --git a/src/test/regress/sql/gp_prepared_xacts.sql b/src/test/regress/sql/gp_prepared_xacts.sql new file mode 100644 index 00000000000..349f5fc6031 --- /dev/null +++ b/src/test/regress/sql/gp_prepared_xacts.sql @@ -0,0 +1,6 @@ +-- PREPARE TRANSACTION should not work +BEGIN; +PREPARE TRANSACTION 'foo_prep_xact'; + +-- PREPARE TRANSACTION should not work in utility-mode connections either +\! PGOPTIONS='-c gp_role=utility' psql -X regression -c "BEGIN; PREPARE TRANSACTION 'foo_prep_xact';" From 1838af88db45e31c50bc5aaf2f4237b0de556c97 Mon Sep 17 00:00:00 2001 From: David Kimura Date: Wed, 9 Mar 2022 23:36:41 +0000 Subject: [PATCH 04/46] Add ORCA GUC to disable NLJ Use optimizer_enable_nljoin to disable all xforms that produce nestloop join alternatives. Co-authored-by: Orhan Kislal --- .../gpopt/config/CConfigParamMapping.cpp | 7 +++++ .../libgpopt/include/gpopt/xforms/CXform.h | 3 ++ .../gporca/libgpopt/src/xforms/CXform.cpp | 27 +++++++++++++++++ src/backend/utils/misc/guc_gp.c | 11 +++++++ src/include/utils/guc.h | 1 + src/include/utils/unsync_guc_name.h | 1 + src/test/regress/expected/bfv_joins.out | 30 +++++++++++++++++++ .../regress/expected/bfv_joins_optimizer.out | 29 ++++++++++++++++++ src/test/regress/sql/bfv_joins.sql | 11 +++++++ 9 files changed, 120 insertions(+) diff --git a/src/backend/gpopt/config/CConfigParamMapping.cpp b/src/backend/gpopt/config/CConfigParamMapping.cpp index 7dff57aad09..54bcc0698a3 100644 --- a/src/backend/gpopt/config/CConfigParamMapping.cpp +++ b/src/backend/gpopt/config/CConfigParamMapping.cpp @@ -344,6 +344,13 @@ CConfigParamMapping::PackConfigParamInBitset( } } + if (!optimizer_enable_nljoin) + { + CBitSet *nl_join_bitset = CXform::PbsNLJoinXforms(mp); + traceflag_bitset->Union(nl_join_bitset); + nl_join_bitset->Release(); + } + if (!optimizer_enable_indexjoin) { CBitSet *index_join_bitset = CXform::PbsIndexJoinXforms(mp); diff --git a/src/backend/gporca/libgpopt/include/gpopt/xforms/CXform.h b/src/backend/gporca/libgpopt/include/gpopt/xforms/CXform.h index d8d40bb198c..6be419cc47a 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/xforms/CXform.h +++ b/src/backend/gporca/libgpopt/include/gpopt/xforms/CXform.h @@ -309,6 +309,9 @@ class CXform : public CRefCount, public DbgPrintMixin // equality function over xform ids static BOOL FEqualIds(const CHAR *szIdOne, const CHAR *szIdTwo); + // returns a set containing all xforms related to nl join + // caller takes ownership of the returned set + static CBitSet *PbsNLJoinXforms(CMemoryPool *mp); // returns a set containing all xforms related to index join // caller takes ownership of the returned set diff --git a/src/backend/gporca/libgpopt/src/xforms/CXform.cpp b/src/backend/gporca/libgpopt/src/xforms/CXform.cpp index 56b9fe74cf6..387c6d03df5 100644 --- a/src/backend/gporca/libgpopt/src/xforms/CXform.cpp +++ b/src/backend/gporca/libgpopt/src/xforms/CXform.cpp @@ -126,6 +126,33 @@ CXform::FEqualIds(const CHAR *szIdOne, const CHAR *szIdTwo) return 0 == clib::Strcmp(szIdOne, szIdTwo); } +//--------------------------------------------------------------------------- +// @function: +// CXform::PbsNLJoinXforms +// +// @doc: +// Returns a set containing all xforms related to nestloop join. +// Caller takes ownership of the returned set +// +//--------------------------------------------------------------------------- +CBitSet * +CXform::PbsNLJoinXforms(CMemoryPool *mp) +{ + CBitSet *pbs = GPOS_NEW(mp) CBitSet(mp, EopttraceSentinel); + (void) pbs->ExchangeSet( + GPOPT_DISABLE_XFORM_TF(CXform::ExfInnerJoin2NLJoin)); + (void) pbs->ExchangeSet( + GPOPT_DISABLE_XFORM_TF(CXform::ExfLeftOuterJoin2NLJoin)); + (void) pbs->ExchangeSet( + GPOPT_DISABLE_XFORM_TF(CXform::ExfLeftSemiJoin2NLJoin)); + (void) pbs->ExchangeSet( + GPOPT_DISABLE_XFORM_TF(CXform::ExfLeftAntiSemiJoin2NLJoin)); + (void) pbs->ExchangeSet( + GPOPT_DISABLE_XFORM_TF(CXform::ExfLeftAntiSemiJoinNotIn2NLJoinNotIn)); + + return pbs; +} + //--------------------------------------------------------------------------- // @function: // CXform::PbsIndexJoinXforms diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index dbf57db0b35..e2d265410c8 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -301,6 +301,7 @@ bool optimizer_xforms[OPTIMIZER_XFORMS_COUNT] = {[0 ... OPTIMIZER_XFORMS_COUNT char *optimizer_search_strategy_path = NULL; /* GUCs to tell Optimizer to enable a physical operator */ +bool optimizer_enable_nljoin; bool optimizer_enable_indexjoin; bool optimizer_enable_motions_masteronly_queries; bool optimizer_enable_motions; @@ -2055,6 +2056,16 @@ struct config_bool ConfigureNamesBool_gp[] = true, NULL, NULL, NULL }, + { + {"optimizer_enable_nljoin", PGC_USERSET, DEVELOPER_OPTIONS, + gettext_noop("Enable nested loops join plans in the optimizer."), + NULL, + GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE + }, + &optimizer_enable_nljoin, + true, + NULL, NULL, NULL + }, { {"optimizer_enable_indexjoin", PGC_USERSET, DEVELOPER_OPTIONS, gettext_noop("Enable index nested loops join plans in the optimizer."), diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index b66f0bda825..2142813f3f2 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -488,6 +488,7 @@ extern bool optimizer_xforms[OPTIMIZER_XFORMS_COUNT]; extern char *optimizer_search_strategy_path; /* GUCs to tell Optimizer to enable a physical operator */ +extern bool optimizer_enable_nljoin; extern bool optimizer_enable_indexjoin; extern bool optimizer_enable_motions_masteronly_queries; extern bool optimizer_enable_motions; diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 63f8ed02862..9c3c84049d5 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -422,6 +422,7 @@ "optimizer_enable_hashagg", "optimizer_enable_hashjoin", "optimizer_enable_hashjoin_redistribute_broadcast_children", + "optimizer_enable_nljoin", "optimizer_enable_indexjoin", "optimizer_enable_indexscan", "optimizer_enable_indexonlyscan", diff --git a/src/test/regress/expected/bfv_joins.out b/src/test/regress/expected/bfv_joins.out index 60bd9de14b8..944249df281 100644 --- a/src/test/regress/expected/bfv_joins.out +++ b/src/test/regress/expected/bfv_joins.out @@ -3349,6 +3349,36 @@ EXPLAIN SELECT a, b FROM gp_float1 JOIN gp_float2 ON a = c AND b = float8 '3.0' Optimizer: Postgres query optimizer (10 rows) +-- Testing optimizer_enable_nljoin +SET optimizer_enable_hashjoin=off; +SET optimizer_enable_nljoin=off; +EXPLAIN SELECT * FROM t1 JOIN t2 ON t1.a=t2.a; + QUERY PLAN +------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=679.75..227587.25 rows=7413210 width=16) + -> Hash Join (cost=679.75..128744.45 rows=2471070 width=16) + Hash Cond: (t1.a = t2.a) + -> Seq Scan on t1 (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=321.00..321.00 rows=28700 width=8) + -> Seq Scan on t2 (cost=0.00..321.00 rows=28700 width=8) + Optimizer: Postgres query optimizer +(7 rows) + +SET optimizer_enable_nljoin=on; +EXPLAIN SELECT * FROM t1 JOIN t2 ON t1.a=t2.a; + QUERY PLAN +------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=679.75..227587.25 rows=7413210 width=16) + -> Hash Join (cost=679.75..128744.45 rows=2471070 width=16) + Hash Cond: (t1.a = t2.a) + -> Seq Scan on t1 (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=321.00..321.00 rows=28700 width=8) + -> Seq Scan on t2 (cost=0.00..321.00 rows=28700 width=8) + Optimizer: Postgres query optimizer +(7 rows) + +RESET optimizer_enable_hashjoin; +RESET optimizer_enable_nljoin; -- Clean up. None of the objects we create are very interesting to keep around. reset search_path; set client_min_messages='warning'; diff --git a/src/test/regress/expected/bfv_joins_optimizer.out b/src/test/regress/expected/bfv_joins_optimizer.out index 9387dfce004..4fc2dd47357 100644 --- a/src/test/regress/expected/bfv_joins_optimizer.out +++ b/src/test/regress/expected/bfv_joins_optimizer.out @@ -3345,6 +3345,35 @@ EXPLAIN SELECT a, b FROM gp_float1 JOIN gp_float2 ON a = c AND b = float8 '3.0' Optimizer: Postgres query optimizer (10 rows) +-- Testing optimizer_enable_nljoin +SET optimizer_enable_hashjoin=off; +SET optimizer_enable_nljoin=off; +EXPLAIN SELECT * FROM t1 JOIN t2 ON t1.a=t2.a; + QUERY PLAN +------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=679.75..227587.25 rows=7413210 width=16) + -> Hash Join (cost=679.75..128744.45 rows=2471070 width=16) + Hash Cond: (t1.a = t2.a) + -> Seq Scan on t1 (cost=0.00..321.00 rows=28700 width=8) + -> Hash (cost=321.00..321.00 rows=28700 width=8) + -> Seq Scan on t2 (cost=0.00..321.00 rows=28700 width=8) + Optimizer: Postgres query optimizer +(7 rows) + +SET optimizer_enable_nljoin=on; +EXPLAIN SELECT * FROM t1 JOIN t2 ON t1.a=t2.a; + QUERY PLAN +----------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.35 rows=1 width=16) + -> Nested Loop (cost=0.00..1324032.35 rows=1 width=16) + Join Filter: (t1.a = t2.a) + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=8) + -> Seq Scan on t2 (cost=0.00..431.00 rows=1 width=8) + Optimizer: Pivotal Optimizer (GPORCA) +(6 rows) + +RESET optimizer_enable_hashjoin; +RESET optimizer_enable_nljoin; -- Clean up. None of the objects we create are very interesting to keep around. reset search_path; set client_min_messages='warning'; diff --git a/src/test/regress/sql/bfv_joins.sql b/src/test/regress/sql/bfv_joins.sql index 3e81ab32c1e..d85fa11ee26 100644 --- a/src/test/regress/sql/bfv_joins.sql +++ b/src/test/regress/sql/bfv_joins.sql @@ -347,6 +347,17 @@ EXPLAIN SELECT a, b FROM gp_float1 JOIN gp_float2 ON a = c AND b = float8 '3.0'; -- redistribute based on the compatible constant. EXPLAIN SELECT a, b FROM gp_float1 JOIN gp_float2 ON a = c AND b = float8 '3.0' AND b = float4 '3.0'; +-- Testing optimizer_enable_nljoin +SET optimizer_enable_hashjoin=off; +SET optimizer_enable_nljoin=off; +EXPLAIN SELECT * FROM t1 JOIN t2 ON t1.a=t2.a; + +SET optimizer_enable_nljoin=on; +EXPLAIN SELECT * FROM t1 JOIN t2 ON t1.a=t2.a; + +RESET optimizer_enable_hashjoin; +RESET optimizer_enable_nljoin; + -- Clean up. None of the objects we create are very interesting to keep around. reset search_path; set client_min_messages='warning'; From 7794a41063ee738967f87a4d1775018e40b730b2 Mon Sep 17 00:00:00 2001 From: Adam Lee Date: Wed, 16 Mar 2022 02:25:09 +0000 Subject: [PATCH 05/46] Add GUC gp_log_endpoints to print endpoints information to server log It's disabled by default. --- src/backend/cdb/endpoint/cdbendpoint.c | 20 +++++++++---------- .../cdb/endpoint/cdbendpointretrieve.c | 17 ++++++++-------- src/backend/utils/misc/guc_gp.c | 13 ++++++++++++ src/include/utils/guc.h | 2 ++ src/include/utils/sync_guc_name.h | 1 + 5 files changed, 35 insertions(+), 18 deletions(-) diff --git a/src/backend/cdb/endpoint/cdbendpoint.c b/src/backend/cdb/endpoint/cdbendpoint.c index 236382e6788..c03f3674fd8 100644 --- a/src/backend/cdb/endpoint/cdbendpoint.c +++ b/src/backend/cdb/endpoint/cdbendpoint.c @@ -75,6 +75,7 @@ #include "utils/backend_cancel.h" #include "utils/builtins.h" #include "utils/faultinjector.h" +#include "utils/guc.h" #include "cdb/cdbdisp_query.h" #include "cdb/cdbdispatchresult.h" #include "cdb/cdbendpoint.h" @@ -496,8 +497,7 @@ create_and_connect_mq(TupleDesc tupleDesc, dsm_segment **mqSeg /* out */ , char *tupdescSpace; TupleDescNode *node = makeNode(TupleDescNode); - elog(DEBUG3, - "CDB_ENDPOINTS: create and setup the shared memory message queue"); + elogif(gp_log_endpoints, LOG, "CDB_ENDPOINT: create and setup the shared memory message queue"); /* Serialize TupleDesc */ node->natts = tupleDesc->natts; @@ -556,7 +556,7 @@ setup_endpoint_token_entry() LWLockAcquire(ParallelCursorEndpointLock, LW_EXCLUSIVE); infoEntry = (EndpointTokenEntry *) hash_search(EndpointTokenHash, &tag, HASH_ENTER, &found); - elog(DEBUG3, "CDB_ENDPOINT: Finish endpoint init. Found EndpointTokenEntry? %d", found); + elogif(gp_log_endpoints, LOG, "CDB_ENDPOINT: Finish endpoint init. Found EndpointTokenEntry? %d", found); /* * Save the token if it is the first time we create endpoint in current @@ -626,7 +626,7 @@ wait_receiver(void) { EndpointExecState * state = CurrentEndpointExecState; - elog(DEBUG3, "CDB_ENDPOINTS: wait receiver"); + elogif(gp_log_endpoints, LOG, "CDB_ENDPOINT: wait receiver"); while (true) { int wr = 0; @@ -636,7 +636,7 @@ wait_receiver(void) if (QueryFinishPending) break; - elog(DEBUG5, "CDB_ENDPOINT: sender wait latch in wait_receiver()"); + elogif(gp_log_endpoints, LOG, "CDB_ENDPOINT: sender wait latch in wait_receiver()"); wr = WaitLatchOrSocket(&state->endpoint->ackDone, WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_TIMEOUT | WL_SOCKET_READABLE, MyProcPort->sock, @@ -664,7 +664,7 @@ wait_receiver(void) if (wr & WL_LATCH_SET) { - elog(DEBUG3, "CDB_ENDPOINT:sender reset latch in wait_receiver()"); + elogif(gp_log_endpoints, LOG, "CDB_ENDPOINT: sender reset latch in wait_receiver()"); ResetLatch(&state->endpoint->ackDone); break; } @@ -679,7 +679,7 @@ wait_receiver(void) static void detach_mq(dsm_segment *dsmSeg) { - elog(DEBUG3, "CDB_ENDPOINT: Sender message queue detaching. '%p'", + elogif(gp_log_endpoints, LOG, "CDB_ENDPOINT: sender message queue detaching. '%p'", (void *) dsmSeg); Assert(dsmSeg); @@ -699,7 +699,7 @@ unset_endpoint_sender_pid(Endpoint *endpoint) Assert(endpoint); Assert(!endpoint->empty); - elog(DEBUG3, "CDB_ENDPOINT: unset endpoint sender pid"); + elogif(gp_log_endpoints, LOG, "CDB_ENDPOINT: unset endpoint sender pid"); /* * Only the endpoint QE/entry DB execute this unset sender pid function. @@ -785,7 +785,7 @@ wait_parallel_retrieve_close(void) if (QueryFinishPending || QueryCancelPending) break; - elog(DEBUG3, "CDB_ENDPOINT: wait for parallel retrieve cursor close"); + elogif(gp_log_endpoints, LOG, "CDB_ENDPOINT: wait for parallel retrieve cursor close"); wr = WaitLatchOrSocket(&MyProc->procLatch, WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_TIMEOUT | WL_SOCKET_READABLE, MyProcPort->sock, @@ -829,7 +829,7 @@ free_endpoint(Endpoint *endpoint) Assert(endpoint); Assert(!endpoint->empty); - elog(DEBUG3, "CDB_ENDPOINTS: Free endpoint '%s'.", endpoint->name); + elogif(gp_log_endpoints, LOG, "CDB_ENDPOINT: free endpoint '%s'", endpoint->name); endpoint->databaseID = InvalidOid; endpoint->mqDsmHandle = DSM_HANDLE_INVALID; diff --git a/src/backend/cdb/endpoint/cdbendpointretrieve.c b/src/backend/cdb/endpoint/cdbendpointretrieve.c index a51aa678af6..349475f07be 100644 --- a/src/backend/cdb/endpoint/cdbendpointretrieve.c +++ b/src/backend/cdb/endpoint/cdbendpointretrieve.c @@ -39,6 +39,7 @@ #include "utils/dynahash.h" #include "utils/elog.h" #include "utils/faultinjector.h" +#include "utils/guc.h" #include "cdbendpoint_private.h" #include "cdb/cdbendpoint.h" #include "cdb/cdbsrlz.h" @@ -433,7 +434,7 @@ attach_receiver_mq(dsm_handle dsmHandle) */ oldcontext = MemoryContextSwitchTo(TopMemoryContext); - elog(DEBUG3, "CDB_ENDPOINTS: init message queue conn for receiver"); + elogif(gp_log_endpoints, LOG, "CDB_ENDPOINT: init message queue conn for receiver"); entry->mqSeg = dsm_attach(dsmHandle); if (entry->mqSeg == NULL) @@ -538,8 +539,8 @@ retrieve_next_tuple() * at the first time to retrieve data, tell sender not to wait at * wait_receiver() */ - elog(DEBUG3, "CDB_ENDPOINT: receiver notifies sender in " - "retrieve_next_tuple() when retrieving data for the first time"); + elogif(gp_log_endpoints, LOG, "CDB_ENDPOINT: receiver notifies sender in " + "retrieve_next_tuple() when retrieving data for the first time"); notify_sender(false); } @@ -619,8 +620,8 @@ finish_retrieve(bool resetPID) * however, can not get endpoint through get_endpoint_from_retrieve_exec_entry. */ LWLockRelease(ParallelCursorEndpointLock); - elog(DEBUG3, "the Endpoint entry %s has already been cleaned, \ - remove from RetrieveCtl.RetrieveExecEntryHTB hash table.", entry->endpointName); + elogif(gp_log_endpoints, LOG, "the Endpoint entry %s has already been cleaned, \ + remove from RetrieveCtl.RetrieveExecEntryHTB hash table", entry->endpointName); hash_search(RetrieveCtl.RetrieveExecEntryHTB, entry->endpointName, HASH_REMOVE, NULL); RetrieveCtl.current_entry = NULL; return; @@ -679,7 +680,7 @@ retrieve_cancel_action(RetrieveExecEntry * entry, char *msg) endpoint->state = ENDPOINTSTATE_RELEASED; if (endpoint->senderPid != InvalidPid) { - elog(DEBUG3, "CDB_ENDPOINT: signal sender to abort"); + elogif(gp_log_endpoints, LOG, "CDB_ENDPOINT: signal sender to abort"); SetBackendCancelMessage(endpoint->senderPid, msg); kill(endpoint->senderPid, SIGINT); } @@ -728,7 +729,7 @@ retrieve_exit_callback(int code, Datum arg) RetrieveExecEntry *entry; HTAB *entryHTB = RetrieveCtl.RetrieveExecEntryHTB; - elog(DEBUG3, "CDB_ENDPOINTS: retrieve exit callback"); + elogif(gp_log_endpoints, LOG, "CDB_ENDPOINT: retrieve exit callback"); /* Nothing to do if the hashtable is not ready. */ if (entryHTB == NULL) @@ -768,7 +769,7 @@ retrieve_xact_callback(XactEvent ev, void *arg pg_attribute_unused()) { if (ev == XACT_EVENT_ABORT) { - elog(DEBUG3, "CDB_ENDPOINT: retrieve xact abort callback"); + elogif(gp_log_endpoints, LOG, "CDB_ENDPOINT: retrieve xact abort callback"); if (RetrieveCtl.sessionID != InvalidEndpointSessionId && RetrieveCtl.current_entry) { diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index e2d265410c8..2393346516e 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -436,6 +436,8 @@ int gp_predicate_pushdown_sample_rows; bool enable_offload_entry_to_qe = false; bool enable_answer_query_using_materialized_views = false; +bool gp_log_endpoints = false; + static const struct config_enum_entry gp_log_format_options[] = { {"text", 0}, {"csv", 1}, @@ -2862,6 +2864,17 @@ struct config_bool ConfigureNamesBool_gp[] = false, NULL, NULL }, + { + {"gp_log_endpoints", PGC_SUSET, LOGGING_WHAT, + gettext_noop("Prints endpoints information to server log."), + NULL, + GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE + }, + &gp_log_endpoints, + false, + NULL, NULL, NULL + }, + { {"optimizer_enable_eageragg", PGC_USERSET, DEVELOPER_OPTIONS, gettext_noop("Enable Eager Agg transform for pushing aggregate below an innerjoin."), diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 2142813f3f2..4cd336f4909 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -622,6 +622,8 @@ extern bool gp_enable_global_deadlock_detector; extern bool gp_enable_predicate_pushdown; extern int gp_predicate_pushdown_sample_rows; +extern bool gp_log_endpoints; + typedef enum { INDEX_CHECK_NONE, diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 14c5125a2d2..02419fc8c22 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -57,6 +57,7 @@ "gp_interconnect_timer_period", "gp_interconnect_transmit_timeout", "gp_interconnect_type", + "gp_log_endpoints", "gp_log_interconnect", "gp_log_resgroup_memory", "gp_log_resqueue_memory", From 4f5ab2166b15403e066afb94b6427b98786f7cee Mon Sep 17 00:00:00 2001 From: Divyesh Vanjare Date: Thu, 31 Mar 2022 14:38:35 -0700 Subject: [PATCH 06/46] Refactor IC motion socket setup - Assert that interconnect_address is always set, in order to get rid of conditional code - Remove AI_PASSIVE flag from socket setup functions (it was being ignored anyway as we always pass a unicast address to getaddrinfo) - Comment cleanup - Added a regress test for checking motion socket creation Co-authored-by: Soumyadeep Chakraborty --- contrib/interconnect/tcp/ic_tcp.c | 26 +++----- contrib/interconnect/udp/ic_udpifc.c | 27 +++----- src/backend/cdb/cdbutil.c | 4 +- src/backend/cdb/dispatcher/cdbgang.c | 8 +-- src/test/regress/expected/motion_socket.out | 71 +++++++++++++++++++++ src/test/regress/greenplum_schedule | 3 + src/test/regress/sql/motion_socket.sql | 62 ++++++++++++++++++ 7 files changed, 155 insertions(+), 46 deletions(-) create mode 100644 src/test/regress/expected/motion_socket.out create mode 100644 src/test/regress/sql/motion_socket.sql diff --git a/contrib/interconnect/tcp/ic_tcp.c b/contrib/interconnect/tcp/ic_tcp.c index 82475520b5f..3306f25467e 100644 --- a/contrib/interconnect/tcp/ic_tcp.c +++ b/contrib/interconnect/tcp/ic_tcp.c @@ -140,29 +140,19 @@ setupTCPListeningSocket(int backlog, int *listenerSocketFd, int32 *listenerPort) memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ hints.ai_socktype = SOCK_STREAM; /* Two-way, out of band connection */ - hints.ai_flags = AI_PASSIVE; /* For wildcard IP address */ hints.ai_protocol = 0; /* Any protocol - TCP implied for network use * due to SOCK_STREAM */ /* - * We set interconnect_address on the primary to the local address of the - * connection from QD to the primary, which is the primary's ADDRESS from - * gp_segment_configuration, used for interconnection. However it's wrong - * on the master. Because the connection from the client to the master may - * have different IP addresses as its destination, which is very likely - * not the master's ADDRESS in gp_segment_configuration. + * Restrict what IP address we will listen on to just the one that was + * used to create this QE session. */ - if (interconnect_address) - { - /* - * Restrict what IP address we will listen on to just the one that was - * used to create this QE session. - */ - hints.ai_flags |= AI_NUMERICHOST; - ereport(DEBUG1, (errmsg("binding to %s only", interconnect_address))); - if (gp_log_interconnect >= GPVARS_VERBOSITY_DEBUG) - ereport(DEBUG4, (errmsg("binding listener %s", interconnect_address))); - } + Assert(interconnect_address && strlen(interconnect_address) > 0); + hints.ai_flags |= AI_NUMERICHOST; + if (gp_log_interconnect >= GPVARS_VERBOSITY_DEBUG) + ereport(DEBUG1, + (errmsg("getaddrinfo called with interconnect_address %s", + interconnect_address))); s = getaddrinfo(interconnect_address, service, &hints, &addrs); if (s != 0) diff --git a/contrib/interconnect/udp/ic_udpifc.c b/contrib/interconnect/udp/ic_udpifc.c index b6a415c8469..4d4a3ca8f82 100644 --- a/contrib/interconnect/udp/ic_udpifc.c +++ b/contrib/interconnect/udp/ic_udpifc.c @@ -1193,7 +1193,6 @@ setupUDPListeningSocket(int *listenerSocketFd, int32 *listenerPort, int *txFamil memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ hints.ai_socktype = SOCK_DGRAM; /* Datagram socket */ - hints.ai_flags = AI_PASSIVE; /* For wildcard IP address */ hints.ai_protocol = 0; /* Any protocol - UDP implied for network use due to SOCK_DGRAM */ #ifdef USE_ASSERT_CHECKING @@ -1203,24 +1202,16 @@ setupUDPListeningSocket(int *listenerSocketFd, int32 *listenerPort, int *txFamil fun = "getaddrinfo"; /* - * We set interconnect_address on the primary to the local address of the connection from QD - * to the primary, which is the primary's ADDRESS from gp_segment_configuration, - * used for interconnection. - * However it's wrong on the master. Because the connection from the client to the master may - * have different IP addresses as its destination, which is very likely not the master's - * ADDRESS in gp_segment_configuration. + * Restrict what IP address we will listen on to just the one that was + * used to create this QE session. */ - if (interconnect_address) - { - /* - * Restrict what IP address we will listen on to just the one that was - * used to create this QE session. - */ - hints.ai_flags |= AI_NUMERICHOST; - ereport(DEBUG1, (errmsg("binding to %s only", interconnect_address))); - if (gp_log_interconnect >= GPVARS_VERBOSITY_DEBUG) - ereport(DEBUG4, (errmsg("binding address %s", interconnect_address))); - } + Assert(interconnect_address && strlen(interconnect_address) > 0); + hints.ai_flags |= AI_NUMERICHOST; + if (gp_log_interconnect >= GPVARS_VERBOSITY_DEBUG) + ereport(DEBUG1, + (errmsg("getaddrinfo called with interconnect_address %s", + interconnect_address))); + s = getaddrinfo(interconnect_address, service, &hints, &addrs); if (s != 0) elog(ERROR, "getaddrinfo says %s", gai_strerror(s)); diff --git a/src/backend/cdb/cdbutil.c b/src/backend/cdb/cdbutil.c index 69d6ffda974..264bdc8c284 100644 --- a/src/backend/cdb/cdbutil.c +++ b/src/backend/cdb/cdbutil.c @@ -1024,7 +1024,6 @@ ensureInterconnectAddress(void) if (GpIdentity.segindex >= 0) { Assert(Gp_role == GP_ROLE_EXECUTE); - Assert(MyProcPort != NULL); Assert(MyProcPort->laddr.addr.ss_family == AF_INET || MyProcPort->laddr.addr.ss_family == AF_INET6); /* @@ -1058,8 +1057,7 @@ ensureInterconnectAddress(void) */ interconnect_address = qdHostname; } - else - Assert(false); + Assert(interconnect_address && strlen(interconnect_address) > 0); } /* * performs all necessary setup required for Cloudberry Database mode. diff --git a/src/backend/cdb/dispatcher/cdbgang.c b/src/backend/cdb/dispatcher/cdbgang.c index 966c293ad7c..85e29af627f 100644 --- a/src/backend/cdb/dispatcher/cdbgang.c +++ b/src/backend/cdb/dispatcher/cdbgang.c @@ -665,13 +665,7 @@ getCdbProcessesForQD(int isPrimary) /* * Set QD listener address to the ADDRESS of the master, so the motions that connect to - * the master knows what the interconnect address of the peer is. `adjustMasterRouting()` - * is not necessary, and it could be wrong if the QD/QE on the master binds a single IP - * address for interconnection instead of the wildcard address. Binding the wildcard address - * for interconnection has some flaws: - * 1. All the QD/QE in the same node share the same port space(for a same AF_INET/AF_INET6), - * which contributes to run out of port. - * 2. When the segments have their own ADDRESS, the connection address could be confusing. + * the master knows what the interconnect address of the peer is. */ proc->listenerAddr = pstrdup(qdinfo->config->hostip); proc->listenerPort = CurrentMotionIPCLayer->GetListenPort(); diff --git a/src/test/regress/expected/motion_socket.out b/src/test/regress/expected/motion_socket.out new file mode 100644 index 00000000000..cbb6ffeb965 --- /dev/null +++ b/src/test/regress/expected/motion_socket.out @@ -0,0 +1,71 @@ +-- The following test checks if the correct number and type of sockets are +-- created for motion connections both on QD and QE backends for the same +-- gp_session_id. Additionally we check if the source address used for creating +-- the motion sockets is equal to gp_segment_configuration.address. +-- start_matchignore +-- m/^INFO: Checking postgres backend postgres:*/ +-- end_matchignore +CREATE FUNCTION check_motion_sockets() + RETURNS VOID as $$ +import psutil, socket + +# Create a temporary table to create a gang +plpy.execute("CREATE TEMP TABLE motion_socket_force_create_gang(i int);") + +# We expect different number of sockets to be created for different +# interconnect types +# UDP: See calls to setupUDPListeningSocket in InitMotionUDPIFC +# TCP/PROXY: See call to setupTCPListeningSocket in InitMotionTCP +res = plpy.execute("SELECT current_setting('gp_interconnect_type');", 1) +ic_type = res[0]['current_setting'] +if ic_type in ['tcp', 'proxy']: + expected_socket_count_per_segment = 1 + expected_socket_kind = socket.SocketKind.SOCK_STREAM +elif ic_type=='udpifc': + expected_socket_count_per_segment = 2 + expected_socket_kind = socket.SocketKind.SOCK_DGRAM +else: + plpy.error('Unrecognized gp_interconnect_type {}.'.format(ic_type)) + +# Since this test is run on a single physical host we assume that all segments +# have the same gp_segment_configuration.address +res = plpy.execute("SELECT address FROM gp_segment_configuration;", 1) +hostip = socket.gethostbyname(res[0]['address']) + +res = plpy.execute("SELECT current_setting('gp_session_id');", 1) +qd_backend_conn_id = res[0]['current_setting'] + +for process in psutil.process_iter(): + # We iterate through all backends related to connection id + # of current session + # Exclude zombies to avoid psutil.ZombieProcess exceptions + # on calling process.cmdline() + if process.name() == 'postgres' and process.status() != psutil.STATUS_ZOMBIE: + if ' con' + qd_backend_conn_id + ' ' in process.cmdline()[0]: + motion_socket_count = 0 + plpy.info('Checking postgres backend {}'.format(process.cmdline()[0])) + for conn in process.connections(): + if conn.type == expected_socket_kind and conn.raddr == () \ + and conn.laddr.ip == hostip: + motion_socket_count += 1 + + if motion_socket_count != expected_socket_count_per_segment: + plpy.error('Expected {} motion sockets but found {}. '\ + 'For backend process {}. connections= {}'\ + .format(expected_socket_count_per_segment, process,\ + motion_socket_count, process.connections())) + + +$$ LANGUAGE plpython3u EXECUTE ON MASTER; +SELECT check_motion_sockets(); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INFO: Checking postgres backend postgres: 7000, vanjared regression 127.0.0.1(59545) con1812 cmd2 SELECT +INFO: Checking postgres backend postgres: 7002, vanjared regression 127.0.0.1(59550) con1812 seg0 idle in transaction +INFO: Checking postgres backend postgres: 7003, vanjared regression 127.0.0.1(59551) con1812 seg1 idle in transaction +INFO: Checking postgres backend postgres: 7004, vanjared regression 127.0.0.1(59552) con1812 seg2 idle in transaction + check_motion_sockets +---------------------- + +(1 row) + diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index c5838508e85..a6e4f8bc62d 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -321,4 +321,7 @@ test: am_encoding # tests of directory table test: directory_table +# test if motion sockets are created with the gp_segment_configuration.address +test: motion_socket + # end of tests diff --git a/src/test/regress/sql/motion_socket.sql b/src/test/regress/sql/motion_socket.sql new file mode 100644 index 00000000000..c3c396ad41e --- /dev/null +++ b/src/test/regress/sql/motion_socket.sql @@ -0,0 +1,62 @@ +-- The following test checks if the correct number and type of sockets are +-- created for motion connections both on QD and QE backends for the same +-- gp_session_id. Additionally we check if the source address used for creating +-- the motion sockets is equal to gp_segment_configuration.address. + + +-- start_matchignore +-- m/^INFO: Checking postgres backend postgres:*/ +-- end_matchignore +CREATE FUNCTION check_motion_sockets() + RETURNS VOID as $$ +import psutil, socket + +# Create a temporary table to create a gang +plpy.execute("CREATE TEMP TABLE motion_socket_force_create_gang(i int);") + +# We expect different number of sockets to be created for different +# interconnect types +# UDP: See calls to setupUDPListeningSocket in InitMotionUDPIFC +# TCP/PROXY: See call to setupTCPListeningSocket in InitMotionTCP +res = plpy.execute("SELECT current_setting('gp_interconnect_type');", 1) +ic_type = res[0]['current_setting'] +if ic_type in ['tcp', 'proxy']: + expected_socket_count_per_segment = 1 + expected_socket_kind = socket.SocketKind.SOCK_STREAM +elif ic_type=='udpifc': + expected_socket_count_per_segment = 2 + expected_socket_kind = socket.SocketKind.SOCK_DGRAM +else: + plpy.error('Unrecognized gp_interconnect_type {}.'.format(ic_type)) + +# Since this test is run on a single physical host we assume that all segments +# have the same gp_segment_configuration.address +res = plpy.execute("SELECT address FROM gp_segment_configuration;", 1) +hostip = socket.gethostbyname(res[0]['address']) + +res = plpy.execute("SELECT current_setting('gp_session_id');", 1) +qd_backend_conn_id = res[0]['current_setting'] + +for process in psutil.process_iter(): + # We iterate through all backends related to connection id + # of current session + # Exclude zombies to avoid psutil.ZombieProcess exceptions + # on calling process.cmdline() + if process.name() == 'postgres' and process.status() != psutil.STATUS_ZOMBIE: + if ' con' + qd_backend_conn_id + ' ' in process.cmdline()[0]: + motion_socket_count = 0 + plpy.info('Checking postgres backend {}'.format(process.cmdline()[0])) + for conn in process.connections(): + if conn.type == expected_socket_kind and conn.raddr == () \ + and conn.laddr.ip == hostip: + motion_socket_count += 1 + + if motion_socket_count != expected_socket_count_per_segment: + plpy.error('Expected {} motion sockets but found {}. '\ + 'For backend process {}. connections= {}'\ + .format(expected_socket_count_per_segment, process,\ + motion_socket_count, process.connections())) + + +$$ LANGUAGE plpython3u EXECUTE ON MASTER; +SELECT check_motion_sockets(); From 77f6c385628a2db3e336727dcaa9752a58ed2728 Mon Sep 17 00:00:00 2001 From: Huansong Fu Date: Tue, 22 Mar 2022 19:46:39 -0700 Subject: [PATCH 07/46] Fix FTS prematurely failing primary over due to long reset period Add a new state and corresponding error message for RESET and let FTS ignores it when it detects primary down. Detailed rationale behind the change: this RESET period is when a primary crashes but have not yet started recovery. Normally this is a short period but we've seen cases where the primary's postmaster waits a long time (40 to 50 seconds) for backends to exit. Because previously PM would send "in recovery" response to FTS during that time, and FTS sense no recovery progress, it would panic and issue failover. Now we just let FTS ignore that state. We could add a new FTS timeout to guard against primary being stuck waiting in that state, but we think it should be very rare so we aren't doing that until we see a need. There's a 5-second timeout `SIGKILL_CHILDREN_AFTER_SECS` on the PM side, after which PM will send `SIGKILL` to its children. Also make the new mode be respected by certain retry mechanism, such as in the isolation2 framework and the segment_failure_due_to_recovery(). --- src/backend/cdb/dispatcher/cdbgang.c | 4 + src/backend/cdb/dispatcher/cdbgang_async.c | 6 +- src/backend/fts/ftsprobe.c | 47 ++++++++-- src/backend/postmaster/postmaster.c | 26 +++++- src/include/libpq/libpq-be.h | 3 +- src/include/postmaster/ftsprobe.h | 11 ++- src/include/postmaster/postmaster.h | 1 + .../isolation2/expected/fts_segment_reset.out | 93 +++++++++++++++++++ src/test/isolation2/isolation2_schedule | 1 + src/test/isolation2/sql/fts_segment_reset.sql | 59 ++++++++++++ src/test/isolation2/sql_isolation_testcase.py | 1 + src/test/regress/output/dispatch.source | 2 +- 12 files changed, 238 insertions(+), 16 deletions(-) create mode 100644 src/test/isolation2/expected/fts_segment_reset.out create mode 100644 src/test/isolation2/sql/fts_segment_reset.sql diff --git a/src/backend/cdb/dispatcher/cdbgang.c b/src/backend/cdb/dispatcher/cdbgang.c index 85e29af627f..c6c4cd64312 100644 --- a/src/backend/cdb/dispatcher/cdbgang.c +++ b/src/backend/cdb/dispatcher/cdbgang.c @@ -181,6 +181,10 @@ segment_failure_due_to_recovery(const char *error_message) ptr = strstr(error_message, fatal); if ((ptr != NULL) && ptr[fatal_len] == ':') { + if (strstr(error_message, _(POSTMASTER_IN_RESET_MSG))) + { + return true; + } if (strstr(error_message, _(POSTMASTER_IN_STARTUP_MSG))) { return true; diff --git a/src/backend/cdb/dispatcher/cdbgang_async.c b/src/backend/cdb/dispatcher/cdbgang_async.c index 5b96dfbf04e..0d65fa58142 100644 --- a/src/backend/cdb/dispatcher/cdbgang_async.c +++ b/src/backend/cdb/dispatcher/cdbgang_async.c @@ -237,7 +237,7 @@ cdbgang_createGang_async(List *segments, SegmentType segmentType) { in_recovery_mode_count++; connStatusDone[i] = true; - elog(LOG, "segment is in recovery mode (%s)", segdbDesc->whoami); + elog(LOG, "segment is in reset/recovery mode (%s)", segdbDesc->whoami); } else { @@ -309,7 +309,7 @@ cdbgang_createGang_async(List *segments, SegmentType segmentType) ELOG_DISPATCHER_DEBUG("createGang: %d processes requested; %d successful connections %d in recovery", size, successful_connections, in_recovery_mode_count); - /* some segments are in recovery mode */ + /* some segments are in reset/recovery mode */ if (successful_connections != size) { Assert(successful_connections + in_recovery_mode_count == size); @@ -318,7 +318,7 @@ cdbgang_createGang_async(List *segments, SegmentType segmentType) create_gang_retry_counter++ >= gp_gang_creation_retry_count) ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("failed to acquire resources on one or more segments"), - errdetail("Segments are in recovery mode."))); + errdetail("Segments are in reset/recovery mode."))); ELOG_DISPATCHER_DEBUG("createGang: gang creation failed, but retryable."); diff --git a/src/backend/fts/ftsprobe.c b/src/backend/fts/ftsprobe.c index b1abbda2ae7..36c08853e29 100644 --- a/src/backend/fts/ftsprobe.c +++ b/src/backend/fts/ftsprobe.c @@ -199,8 +199,15 @@ ftsConnectStart(fts_segment_info *ftsInfo) return true; } +/* + * Check if the primary segment is restarting normally by examing the PQ error message. + * It could be that they are in RESET (waiting for the children to exit) or making + * progress in RECOVERY. Note there is no good source of RESET progress indications + * that we could check, so we simply always allow it. Normally RESET should be fast + * and there's a timeout in postmaster to guard against long wait. + */ static void -checkIfFailedDueToRecoveryInProgress(fts_segment_info *ftsInfo) +checkIfFailedDueToNormalRestart(fts_segment_info *ftsInfo) { if (strstr(PQerrorMessage(ftsInfo->conn), _(POSTMASTER_IN_RECOVERY_MSG)) || strstr(PQerrorMessage(ftsInfo->conn), _(POSTMASTER_IN_STARTUP_MSG))) @@ -241,6 +248,7 @@ checkIfFailedDueToRecoveryInProgress(fts_segment_info *ftsInfo) */ if (tmpptr <= ftsInfo->xlogrecptr) { + ftsInfo->restart_state = PM_IN_RECOVERY_NOT_MAKING_PROGRESS; elog(LOG, "FTS: detected segment is in recovery mode and not making progress (content=%d) " "primary dbid=%d, mirror dbid=%d", ftsInfo->primary_cdbinfo->config->segindex, @@ -249,7 +257,7 @@ checkIfFailedDueToRecoveryInProgress(fts_segment_info *ftsInfo) } else { - ftsInfo->recovery_making_progress = true; + ftsInfo->restart_state = PM_IN_RECOVERY_MAKING_PROGRESS; ftsInfo->xlogrecptr = tmpptr; elogif(gp_log_fts >= GPVARS_VERBOSITY_VERBOSE, LOG, "FTS: detected segment is in recovery mode replayed (%X/%X) (content=%d) " @@ -261,6 +269,15 @@ checkIfFailedDueToRecoveryInProgress(fts_segment_info *ftsInfo) ftsInfo->mirror_cdbinfo->config->dbid); } } + else if (strstr(PQerrorMessage(ftsInfo->conn), _(POSTMASTER_IN_RESET_MSG))) + { + ftsInfo->restart_state = PM_IN_RESETTING; + elog(LOG, "FTS: detected segment is in RESET state (content=%d) " + "primary dbid=%d, mirror dbid=%d", + ftsInfo->primary_cdbinfo->config->segindex, + ftsInfo->primary_cdbinfo->config->dbid, + ftsInfo->mirror_cdbinfo->config->dbid); + } } /* @@ -296,10 +313,11 @@ ftsConnect(fts_context *context) case FTS_SYNCREP_OFF_SEGMENT: case FTS_PROMOTE_SEGMENT: /* - * We always default to false. If connect fails due to recovery in progress - * this variable will be set based on LSN value in error message. + * We always default to PM_NOT_IN_RESTART. If connect fails, we then check + * the primary's restarting state, so we can skip promoting mirror if it's in + * PM_IN_RESETTING or PM_IN_RECOVERY_MAKING_PROGRESS. */ - ftsInfo->recovery_making_progress = false; + ftsInfo->restart_state = PM_NOT_IN_RESTART; if (ftsInfo->conn == NULL) { AssertImply(ftsInfo->retry_count > 0, @@ -348,7 +366,7 @@ ftsConnect(fts_context *context) case PGRES_POLLING_FAILED: ftsInfo->state = nextFailedState(ftsInfo->state); - checkIfFailedDueToRecoveryInProgress(ftsInfo); + checkIfFailedDueToNormalRestart(ftsInfo); elog(LOG, "FTS: cannot establish libpq connection " "(content=%d, dbid=%d): %s, retry_count=%d", ftsInfo->primary_cdbinfo->config->segindex, @@ -1060,8 +1078,19 @@ processResponse(fts_context *context) case FTS_PROBE_FAILED: /* Primary is down */ - /* If primary is in recovery, do not mark it down and promote mirror */ - if (ftsInfo->recovery_making_progress) + /* If primary is in resetting or making progress in recovery, do not mark it down and promote mirror */ + if (ftsInfo->restart_state == PM_IN_RESETTING) + { + Assert(strstr(PQerrorMessage(ftsInfo->conn), _(POSTMASTER_IN_RESET_MSG))); + elogif(gp_log_fts >= GPVARS_VERBOSITY_VERBOSE, LOG, + "FTS: detected segment is in resetting mode " + "(content=%d) primary dbid=%d, mirror dbid=%d", + primary->config->segindex, primary->config->dbid, mirror->config->dbid); + + ftsInfo->state = FTS_RESPONSE_PROCESSED; + break; + } + else if (ftsInfo->restart_state == PM_IN_RECOVERY_MAKING_PROGRESS) { Assert(strstr(PQerrorMessage(ftsInfo->conn), _(POSTMASTER_IN_RECOVERY_MSG)) || strstr(PQerrorMessage(ftsInfo->conn), _(POSTMASTER_IN_STARTUP_MSG))); @@ -1235,7 +1264,7 @@ FtsWalRepInitProbeContext(CdbComponentDatabases *cdbs, fts_context *context) ftsInfo->result.isRoleMirror = false; ftsInfo->result.dbid = primary->config->dbid; ftsInfo->state = FTS_PROBE_SEGMENT; - ftsInfo->recovery_making_progress = false; + ftsInfo->restart_state = PM_NOT_IN_RESTART; ftsInfo->xlogrecptr = InvalidXLogRecPtr; ftsInfo->primary_cdbinfo = primary; diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 15b986e04af..0d43872ebae 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2149,6 +2149,19 @@ ServerLoop(void) AbortStartTime != 0 && (now - AbortStartTime) >= SIGKILL_CHILDREN_AFTER_SECS) { +#ifdef FAULT_INJECTOR + if (SIMPLE_FAULT_INJECTOR("postmaster_server_loop_no_sigkill") == FaultInjectorTypeSkip) + { + /* + * This prevents sending SIGKILL to child processes for testing purpose. + * Since each time hitting this fault will print a log, let's wait 0.1s just + * not to overwhelm the logs. Reaching here means we are shutting down so + * making postmaster slower should be OK (only for testing anyway). + */ + pg_usleep(100000L); + continue; + } +#endif /* We were gentle with them before. Not anymore */ ereport(LOG, (errmsg("issuing SIGKILL to recalcitrant children"))); @@ -2763,6 +2776,11 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) errdetail(POSTMASTER_IN_RECOVERY_DETAIL_MSG " %X/%X", (uint32) (recptr >> 32), (uint32) recptr))); break; + case CAC_RESET: + ereport(FATAL, + (errcode(ERRCODE_CANNOT_CONNECT_NOW), + errmsg(POSTMASTER_IN_RESET_MSG))); + break; case CAC_TOOMANY: ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), @@ -2961,8 +2979,14 @@ canAcceptConnections(int backend_type) else if (!FatalError && pmState == PM_RECOVERY) return CAC_NOTCONSISTENT; /* not yet at consistent recovery * state */ - else + else if (pmState == PM_STARTUP || pmState == PM_RECOVERY) return CAC_RECOVERY; /* else must be crash recovery */ + else + /* + * otherwise must be resetting: could be PM_WAIT_BACKENDS, + * PM_WAIT_DEAD_END or PM_NO_CHILDREN. + */ + return CAC_RESET; } /* diff --git a/src/include/libpq/libpq-be.h b/src/include/libpq/libpq-be.h index 4dcbb2e0f8e..de4883f4f69 100644 --- a/src/include/libpq/libpq-be.h +++ b/src/include/libpq/libpq-be.h @@ -80,7 +80,8 @@ typedef enum CAC_state CAC_NOTCONSISTENT, CAC_TOOMANY, CAC_SUPERUSER, - CAC_MIRROR_READY + CAC_MIRROR_READY, + CAC_RESET } CAC_state; diff --git a/src/include/postmaster/ftsprobe.h b/src/include/postmaster/ftsprobe.h index ea81f0cded0..3eb76d95c08 100644 --- a/src/include/postmaster/ftsprobe.h +++ b/src/include/postmaster/ftsprobe.h @@ -57,6 +57,15 @@ typedef enum */ } FtsMessageState; +/* States indicating what status PM is in restarting. */ +typedef enum PMRestartState +{ + PM_NOT_IN_RESTART, /* PM is not restarting */ + PM_IN_RESETTING, /* PM is in resetting */ + PM_IN_RECOVERY_MAKING_PROGRESS, /* PM is in recovery and is making progress */ + PM_IN_RECOVERY_NOT_MAKING_PROGRESS /* PM is in recovery but not making progress*/ +} PMRestartState; + #define IsFtsMessageStateSuccess(state) (state == FTS_PROBE_SUCCESS || \ state == FTS_SYNCREP_OFF_SUCCESS || state == FTS_PROMOTE_SUCCESS) #define IsFtsMessageStateFailed(state) (state == FTS_PROBE_FAILED || \ @@ -87,7 +96,7 @@ typedef struct struct pg_conn *conn; /* libpq connection object */ int retry_count; XLogRecPtr xlogrecptr; - bool recovery_making_progress; + PMRestartState restart_state; } fts_segment_info; #ifdef USE_INTERNAL_FTS diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h index f9b48b36f32..fdda2eca442 100644 --- a/src/include/postmaster/postmaster.h +++ b/src/include/postmaster/postmaster.h @@ -51,6 +51,7 @@ extern int postmaster_alive_fds[2]; #endif #define POSTMASTER_IN_STARTUP_MSG "the database system is starting up" +#define POSTMASTER_IN_RESET_MSG "the database system is resetting" #define POSTMASTER_IN_RECOVERY_MSG "the database system is in recovery mode" #define POSTMASTER_IN_RECOVERY_DETAIL_MSG "last replayed record at" /* gpstate must be updated if this message changes */ diff --git a/src/test/isolation2/expected/fts_segment_reset.out b/src/test/isolation2/expected/fts_segment_reset.out new file mode 100644 index 00000000000..8d575ccfddc --- /dev/null +++ b/src/test/isolation2/expected/fts_segment_reset.out @@ -0,0 +1,93 @@ +-- This test verifies that FTS shouldn't issue failover to mirror when +-- the primary is taking long in the RESET state. + +-- start_matchsubs +-- m/seg0 [0-9.]+:\d+/ +-- s/seg0 [0-9.]+:\d+/seg0 IP:PORT/ +-- end_matchsubs + +-- Let FTS detect/declare failure sooner +!\retcode gpconfig -c gp_fts_probe_interval -v 10 --masteronly; +(exited with code 0) +!\retcode gpstop -u; +(exited with code 0) + +-- Let the background writer sleep 27 seconds to delay the resetting. +-- This number is selected because there's a slight chance that FTS senses +-- "recovery not in progress" after its 5-second retry window and promote +-- the mirror. So just put the end of the sleep perid away from the end +-- of the retry windows. +select gp_inject_fault('fault_in_background_writer_quickdie', 'sleep', '', '', '', 1, 1, 27, dbid) from gp_segment_configuration where role = 'p' and content = 0; + gp_inject_fault +----------------- + Success: +(1 row) + +-- Do not let the postmaster send SIGKILL to the bgwriter +select gp_inject_fault_infinite('postmaster_server_loop_no_sigkill', 'skip', dbid) from gp_segment_configuration where role = 'p' and content = 0; + gp_inject_fault_infinite +-------------------------- + Success: +(1 row) + +-- Now bring down primary of seg0. There're a lot of ways to do that, in order +-- to better emulate a real-world scnarios we're injecting a PANIC to do that. +1:select gp_inject_fault('start_prepare', 'panic', dbid) from gp_segment_configuration where role = 'p' AND content = 0; + gp_inject_fault +----------------- + Success: +(1 row) +1&:create table fts_reset_t(a int); + +-- This should fail due to the seg0 in reset mode +2&:create table fts_reset_t2(a int); + +-- Try another one but let the gang creation retry for longer. +-- Default is 2000ms (gp_gang_creation_retry_timer) * 5 (gp_gang_creation_retry_count) = 10s. +-- Now make it 50s which is well longer than the delay we inserted before, so it can succeed. +3:set gp_gang_creation_retry_timer = 10000; +SET +3:create table fts_reset_t3(a int); +CREATE + +1<: <... completed> +ERROR: fault triggered, fault name:'start_prepare' fault type:'panic' +2<: <... completed> +DETAIL: Segments are in reset/recovery mode. +ERROR: failed to acquire resources on one or more segments + +-- We shouldn't see failover to mirror +select gp_request_fts_probe_scan(); + gp_request_fts_probe_scan +--------------------------- + t +(1 row) +select dbid, role, preferred_role, status from gp_segment_configuration where content = 0; + dbid | role | preferred_role | status +------+------+----------------+-------- + 2 | p | p | u + 5 | m | m | u +(2 rows) + +select gp_inject_fault('postmaster_server_loop_no_sigkill', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0; + gp_inject_fault +----------------- + Success: +(1 row) +select gp_inject_fault('fault_in_background_writer_quickdie', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0; + gp_inject_fault +----------------- + Success: +(1 row) + +-- The only table that should have been created successfully +drop table fts_reset_t3; +DROP + +-- In case anything goes wrong, we don't want to affect other tests. So rebalance the cluster anyway. +!\retcode gprecoverseg -aF !\retcode gprecoverseg -ar +-- restore parameters +!\retcode gpconfig -r gp_fts_probe_interval --masteronly; +(exited with code 127) +!\retcode gpstop -u; +(exited with code 0) diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index 72eab0700ed..f637816f826 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -245,6 +245,7 @@ test: segwalrep/commit_blocking_on_standby test: segwalrep/dtx_recovery_wait_lsn test: fts_manual_probe test: fts_session_reset +test: fts_segment_reset # Reindex tests test: reindex/abort_reindex diff --git a/src/test/isolation2/sql/fts_segment_reset.sql b/src/test/isolation2/sql/fts_segment_reset.sql new file mode 100644 index 00000000000..cde5a5a6322 --- /dev/null +++ b/src/test/isolation2/sql/fts_segment_reset.sql @@ -0,0 +1,59 @@ +-- This test verifies that FTS shouldn't issue failover to mirror when +-- the primary is taking long in the RESET state. + +-- start_matchsubs +-- m/seg0 [0-9.]+:\d+/ +-- s/seg0 [0-9.]+:\d+/seg0 IP:PORT/ +-- end_matchsubs + +-- Let FTS detect/declare failure sooner +!\retcode gpconfig -c gp_fts_probe_interval -v 10 --masteronly; +!\retcode gpstop -u; + +-- Let the background writer sleep 27 seconds to delay the resetting. +-- This number is selected because there's a slight chance that FTS senses +-- "recovery not in progress" after its 5-second retry window and promote +-- the mirror. So just put the end of the sleep perid away from the end +-- of the retry windows. +select gp_inject_fault('fault_in_background_writer_quickdie', 'sleep', '', '', '', 1, 1, 27, dbid) +from gp_segment_configuration where role = 'p' and content = 0; + +-- Do not let the postmaster send SIGKILL to the bgwriter +select gp_inject_fault_infinite('postmaster_server_loop_no_sigkill', 'skip', dbid) +from gp_segment_configuration where role = 'p' and content = 0; + +-- Now bring down primary of seg0. There're a lot of ways to do that, in order +-- to better emulate a real-world scnarios we're injecting a PANIC to do that. +1:select gp_inject_fault('start_prepare', 'panic', dbid) +from gp_segment_configuration where role = 'p' AND content = 0; +1&:create table fts_reset_t(a int); + +-- This should fail due to the seg0 in reset mode +2&:create table fts_reset_t2(a int); + +-- Try another one but let the gang creation retry for longer. +-- Default is 2000ms (gp_gang_creation_retry_timer) * 5 (gp_gang_creation_retry_count) = 10s. +-- Now make it 50s which is well longer than the delay we inserted before, so it can succeed. +3:set gp_gang_creation_retry_timer = 10000; +3:create table fts_reset_t3(a int); + +1<: +2<: + +-- We shouldn't see failover to mirror +select gp_request_fts_probe_scan(); +select dbid, role, preferred_role, status from gp_segment_configuration where content = 0; + +select gp_inject_fault('postmaster_server_loop_no_sigkill', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0; +select gp_inject_fault('fault_in_background_writer_quickdie', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0; + +-- The only table that should have been created successfully +drop table fts_reset_t3; + +-- In case anything goes wrong, we don't want to affect other tests. So rebalance the cluster anyway. +!\retcode gprecoverseg -aF +!\retcode gprecoverseg -ar + +-- restore parameters +!\retcode gpconfig -r gp_fts_probe_interval --masteronly; +!\retcode gpstop -u; diff --git a/src/test/isolation2/sql_isolation_testcase.py b/src/test/isolation2/sql_isolation_testcase.py index cd2fdfaf6e6..b7fa591b977 100644 --- a/src/test/isolation2/sql_isolation_testcase.py +++ b/src/test/isolation2/sql_isolation_testcase.py @@ -414,6 +414,7 @@ def connectdb(self, given_dbname, given_host = None, given_port = None, given_op self.create_exception = e break elif (("the database system is starting up" in str(e) or + "the database system is resetting" in str(e) or "the database system is in recovery mode" in str(e)) and retry > 1): retry -= 1 diff --git a/src/test/regress/output/dispatch.source b/src/test/regress/output/dispatch.source index d75f71e9e63..185c3257de0 100644 --- a/src/test/regress/output/dispatch.source +++ b/src/test/regress/output/dispatch.source @@ -345,7 +345,7 @@ select cleanupAllGangs(); select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3; ERROR: failed to acquire resources on one or more segments -DETAIL: Segments are in recovery mode. +DETAIL: Segments are in reset/recovery mode. set gp_gang_creation_retry_count to 10; -- should success and process_startup_packet will be invalid after this query select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3 From 385b29da07f313a988b8bb10abb00270bbcc9220 Mon Sep 17 00:00:00 2001 From: SmartKeyerror Date: Wed, 6 Apr 2022 13:58:28 +0800 Subject: [PATCH 08/46] recalculate QE's query_mem proportionally (#13160) In the current resource group implementation, query_mem in the plan tree is calculated using QD's system memory and the number of primary segments, not QE's own system memory and the number of primary segments. This can result in the wrong memory being allocated at the execution stage eventually, which can lead to various problems such as OOM, underutilization of QE resources, etc. The query_mem is linearly to system memory and number of primary segments if we enable resource group, the approximate calculation formula is as follows: query_mem = (total_memory * gp_resource_group_memory_limit * memory_limit / nsegments) * memory_spill_ratio / concurrency Only total_memory and nsegments differ between QD and QE, so we can dispatch these two parameters to QE, and then calculate QE's own query_mem proportionally. At the same time, we use the GUC gp_resource_group_enable_recalculate_query_mem to let the client decides whether to recalculate the query_mem proportionally on QE and repopulate the operatorMemKB in the plan tree according to this value. --- src/backend/cdb/dispatcher/cdbdisp_query.c | 16 ++++ src/backend/executor/execMain.c | 75 +++++++++++++++++-- src/backend/executor/nodeSort.c | 10 +++ src/backend/nodes/copyfuncs.c | 3 + src/backend/nodes/outfuncs.c | 4 + src/backend/nodes/readfuncs.c | 4 + src/backend/utils/misc/guc_gp.c | 11 +++ src/backend/utils/resource_manager/memquota.c | 3 +- src/include/cdb/cdbvars.h | 3 + src/include/nodes/plannodes.h | 3 + src/include/utils/unsync_guc_name.h | 1 + .../expected/resgroup/resgroup_query_mem.out | 50 +++++++++++++ .../isolation2/isolation2_resgroup_schedule | 1 + .../sql/resgroup/resgroup_query_mem.sql | 63 ++++++++++++++++ 14 files changed, 239 insertions(+), 8 deletions(-) create mode 100644 src/test/isolation2/expected/resgroup/resgroup_query_mem.out create mode 100644 src/test/isolation2/sql/resgroup/resgroup_query_mem.sql diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index 9bbfd10dd2d..0af87d54884 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -36,6 +36,7 @@ #include "utils/faultinjector.h" #include "utils/resgroup.h" #include "utils/resource_manager.h" +#include "utils/resgroup-ops.h" #include "utils/session_state.h" #include "utils/typcache.h" #include "miscadmin.h" @@ -265,6 +266,21 @@ CdbDispatchPlan(struct QueryDesc *queryDesc, verify_shared_snapshot_ready(gp_command_count); } + /* In the final stage, add the resource information needed for QE by the resource group */ + stmt->total_memory_coordinator = 0; + stmt->nsegments_coordinator = 0; + + if (IsResGroupEnabled() && gp_resource_group_enable_recalculate_query_mem && + memory_spill_ratio != RESGROUP_FALLBACK_MEMORY_SPILL_RATIO) + { + /* + * We enable resource group re-calculate the query_mem on QE, and we are not in + * fall back mode (use statement_mem). + */ + stmt->total_memory_coordinator = ResGroupOps_GetTotalMemory(); + stmt->nsegments_coordinator = ResGroupGetSegmentNum(); + } + cdbdisp_dispatchX(queryDesc, planRequiresTxn, cancelOnError); } diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index e0f3dbfc71e..14b167a6f4d 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -80,6 +80,7 @@ #include "utils/workfile_mgr.h" #include "utils/faultinjector.h" #include "utils/resource_manager.h" +#include "utils/resgroup-ops.h" #include "catalog/pg_statistic.h" #include "catalog/pg_class.h" @@ -224,9 +225,6 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) if (query_info_collect_hook) (*query_info_collect_hook)(METRICS_QUERY_START, queryDesc); - /** - * Distribute memory to operators. - */ if (Gp_role == GP_ROLE_DISPATCH) { if (!IsResManagerMemoryPolicyNone() && @@ -235,12 +233,75 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) elog(GP_RESMANAGER_MEMORY_LOG_LEVEL, "query requested %.0fKB of memory", (double) queryDesc->plannedstmt->query_mem / 1024.0); } + } - /** - * There are some statements that do not go through the resource queue, so we cannot - * put in a strong assert here. Someday, we should fix resource queues. + /** + * Distribute memory to operators. + * + * There are some statements that do not go through the resource queue, so we cannot + * put in a strong assert here. Someday, we should fix resource queues. + */ + if (queryDesc->plannedstmt->query_mem > 0) + { + /* + * Whether we should skip operator memory assignment + * - We should never skip operator memory assignment on QD. + * - On QE, not skip in case of resource group enabled, and customer allow QE re-calculate query_mem, + * as the GUC `gp_resource_group_enable_recalculate_query_mem` set to on. */ - if (queryDesc->plannedstmt->query_mem > 0) + bool should_skip_operator_memory_assign = true; + + if (Gp_role == GP_ROLE_EXECUTE) + { + /* + * If resource group is enabled, we should re-calculate query_mem on QE, because the memory + * of the coordinator and segment nodes or the number of instance could be different. + * + * On QE, we only try to recalculate query_mem if resource group enabled. Otherwise, we will skip this + * and the next operator memory assignment if resource queue enabled + */ + if (IsResGroupEnabled()) + { + int32 total_memory_coordinator = queryDesc->plannedstmt->total_memory_coordinator; + int nsegments_coordinator = queryDesc->plannedstmt->nsegments_coordinator; + + /* + * memSpill is not in fallback mode, and we enable resource group re-calculate the query_mem on QE, + * then re-calculate the query_mem and re-compute operatorMemKB using this new value + */ + if (total_memory_coordinator != 0 && nsegments_coordinator != 0) + { + should_skip_operator_memory_assign = false; + + /* Get total system memory on the QE in MB */ + int32 total_memory_segment = ResGroupOps_GetTotalMemory(); + int nsegments_segment = ResGroupGetSegmentNum(); + uint64 coordinator_query_mem = queryDesc->plannedstmt->query_mem; + + /* + * In the resource group environment, when we calculate query_mem, we can roughly use the following + * formula: + * + * query_mem = (total_memory * gp_resource_group_memory_limit * memory_limit / nsegments) * memory_spill_ratio / concurrency + * + * Only total_memory and nsegments could differ between QD and QE, so query_mem is proportional to + * the system's available virtual memory and inversely proportional to the number of instances. + */ + queryDesc->plannedstmt->query_mem *= (total_memory_segment * 1.0 / nsegments_segment) / + (total_memory_coordinator * 1.0 / nsegments_coordinator); + + elog(DEBUG1, "re-calculate query_mem, original QD's query_mem: %.0fKB, after recalculation QE's query_mem: %.0fKB", + (double) coordinator_query_mem / 1024.0 , (double) queryDesc->plannedstmt->query_mem / 1024.0); + } + } + } + else + { + /* On QD, we always traverse the plan tree and compute operatorMemKB */ + should_skip_operator_memory_assign = false; + } + + if (!should_skip_operator_memory_assign) { switch(*gp_resmanager_memory_policy) { diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c index 045e4be4ef4..812154d3b38 100644 --- a/src/backend/executor/nodeSort.c +++ b/src/backend/executor/nodeSort.c @@ -209,6 +209,16 @@ ExecInitSort(Sort *node, EState *estate, int eflags) SO1_printf("ExecInitSort: %s\n", "initializing sort node"); + /* + * GPDB + */ +#ifdef FAULT_INJECTOR + if (SIMPLE_FAULT_INJECTOR("rg_qmem_qd_qe") == FaultInjectorTypeSkip) + { + elog(NOTICE, "op_mem=%d", (int) (((Plan *) node)->operatorMemKB)); + } +#endif + /* * create state structure */ diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 7fe3e7d0836..2ab5f2b6d85 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -148,6 +148,9 @@ _copyPlannedStmt(const PlannedStmt *from) COPY_SCALAR_FIELD(query_mem); + COPY_SCALAR_FIELD(total_memory_coordinator); + COPY_SCALAR_FIELD(nsegments_coordinator); + COPY_NODE_FIELD(intoClause); COPY_NODE_FIELD(copyIntoClause); COPY_NODE_FIELD(refreshClause); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 4d0dee54f2c..2aabc00c5bd 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -380,6 +380,10 @@ _outPlannedStmt(StringInfo str, const PlannedStmt *node) WRITE_NODE_FIELD(intoPolicy); WRITE_UINT64_FIELD(query_mem); + + WRITE_UINT_FIELD(total_memory_coordinator); + WRITE_INT_FIELD(nsegments_coordinator); + WRITE_NODE_FIELD(intoClause); WRITE_NODE_FIELD(copyIntoClause); WRITE_NODE_FIELD(refreshClause); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index d82095aa802..96433fa186d 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1658,6 +1658,10 @@ _readPlannedStmt(void) READ_NODE_FIELD(intoPolicy); READ_UINT64_FIELD(query_mem); + + READ_UINT_FIELD(total_memory_coordinator); + READ_INT_FIELD(nsegments_coordinator); + READ_NODE_FIELD(intoClause); READ_NODE_FIELD(copyIntoClause); READ_NODE_FIELD(refreshClause); diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 2393346516e..b2c25f00360 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -226,6 +226,7 @@ double gp_resource_group_cpu_limit; double gp_resource_group_memory_limit; bool gp_resource_group_bypass; bool gp_resource_group_cpu_ceiling_enforcement; +bool gp_resource_group_enable_recalculate_query_mem; /* Metrics collector debug GUC */ bool vmem_process_interrupt = false; @@ -2837,6 +2838,16 @@ struct config_bool ConfigureNamesBool_gp[] = false, NULL, NULL }, + { + {"gp_resource_group_enable_recalculate_query_mem", PGC_USERSET, RESOURCES, + gettext_noop("Enable resource group re-calculate the query_mem on QE"), + NULL + }, + &gp_resource_group_enable_recalculate_query_mem, + true, + NULL, NULL, NULL + }, + { {"stats_queue_level", PGC_SUSET, STATS_COLLECTOR, gettext_noop("Collects resource queue-level statistics on database activity."), diff --git a/src/backend/utils/resource_manager/memquota.c b/src/backend/utils/resource_manager/memquota.c index 648b9c1f09d..bc0b9c9869f 100644 --- a/src/backend/utils/resource_manager/memquota.c +++ b/src/backend/utils/resource_manager/memquota.c @@ -960,8 +960,9 @@ PolicyEagerFreeAssignOperatorMemoryKB(PlannedStmt *stmt, uint64 memAvailableByte int64 ResourceManagerGetQueryMemoryLimit(PlannedStmt* stmt) { + /* Returns QD's query_mem if we are on the QE, for re-calculating QE's query_mem */ if (Gp_role != GP_ROLE_DISPATCH && !IS_SINGLENODE()) - return 0; + return stmt->query_mem; /* no limits in single user mode. */ if (!IsUnderPostmaster) diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index 4a3ef8e5abf..b2f3e52c05e 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -682,6 +682,9 @@ extern int gp_workfile_bytes_to_checksum; extern bool coredump_on_memerror; +/* Greenplum resource group query_mem re-calculate on QE */ +extern bool gp_resource_group_enable_recalculate_query_mem; + /* * Autostats feature, whether or not to to automatically run ANALYZE after * insert/delete/update/ctas or after ctas/copy/insert in case the target diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index fbac2ca02b9..b679dbea1e6 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -141,6 +141,9 @@ typedef struct PlannedStmt /* What is the memory reserved for this query's execution? */ uint64 query_mem; + int32 total_memory_coordinator; /* GPDB: The total usable virtual memory on coordinator node in MB */ + int nsegments_coordinator; /* GPDB: The number of primary segments on coordinator node */ + /* * GPDB: Used to keep target information for CTAS and it is needed * to be dispatched to QEs. diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 9c3c84049d5..82c23f64403 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -270,6 +270,7 @@ "gp_resource_group_cpu_ceiling_enforcement", "gp_resource_group_memory_limit", "gp_resource_group_queuing_timeout", + "gp_resource_group_enable_recalculate_query_mem", "gp_resource_manager", "gp_resqueue_memory_policy", "gp_resqueue_priority", diff --git a/src/test/isolation2/expected/resgroup/resgroup_query_mem.out b/src/test/isolation2/expected/resgroup/resgroup_query_mem.out new file mode 100644 index 00000000000..d05f2bb968c --- /dev/null +++ b/src/test/isolation2/expected/resgroup/resgroup_query_mem.out @@ -0,0 +1,50 @@ +-- This test is to verify that query_mem is set correctly in QEs. +-- Previously, resgroup does not consider that different number of +-- segments among coordinator and segments. Now we let QEs to re-calculate +-- query_mem in each segment locally. This test case use the following +-- steps to verify the new method's correctness: +-- 1. fetch available memory in coordinator and a single segment, +-- compute the ratio +-- 2. use fault inject and plpython invokes pygresql with notice, +-- get a distributed plan's sort's operator memory in a QE +-- 3. Get sort's operator memory in a pure QD's plan (catalog order by) +-- 4. compute the ratio of two operator memorys +-- 5. these two ratios should be the same. + +create extension if not exists gp_inject_fault; +CREATE +create or replace language plpython3u; +CREATE + +create table t_qmem(a int); +CREATE +select gp_inject_fault('rg_qmem_qd_qe', 'skip', dbid) from gp_segment_configuration where role = 'p' and content = 0; + gp_inject_fault +----------------- + Success: +(1 row) + +create function rg_qmem_test() returns boolean as $$ from pg import DB from copy import deepcopy import re +# 1: get resgroup available mem in QD and QE and compute ratio sql = ("select memory_available m from " "gp_toolkit.gp_resgroup_status_per_segment " "where segment_id = %d and rsgname = 'admin_group'") qd_mem = int(plpy.execute(sql % -1)[0]["m"]) qe_mem = int(plpy.execute(sql % 0)[0]["m"]) ratio1 = int(round(float(qd_mem) / qe_mem)) +# 2. use notice to get qe operator mem dbname = plpy.execute("select current_database() db")[0]["db"] db = DB(dbname=dbname) qe_opmem_info = [] db.set_notice_receiver(lambda n: qe_opmem_info.append(deepcopy(n.message))) sql = "select * from t_qmem order by 1" db.query(sql) qe_opmem = int(re.findall(r"op_mem=(\d+)", qe_opmem_info[0])[0]) db.set_notice_receiver(None) +# 3. get qd operator mem sql = "explain analyze select * from pg_class order by relpages limit 10" db.query("set gp_resgroup_print_operator_memory_limits = on;") r = db.query(sql).getresult() for (line, ) in r: if "-> Sort" not in line: continue qd_opmem = int(re.findall(r"operatorMem: (\d+)", line)[0]) break +db.close() +ratio2 = int(round(float(qd_opmem) / qe_opmem)) +return ratio1 == ratio2 +$$ language plpython3u; +CREATE + +select rg_qmem_test(); + rg_qmem_test +-------------- + t +(1 row) +select gp_inject_fault('rg_qmem_qd_qe', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0; + gp_inject_fault +----------------- + Success: +(1 row) +drop function rg_qmem_test(); +DROP +drop table t_qmem; +DROP diff --git a/src/test/isolation2/isolation2_resgroup_schedule b/src/test/isolation2/isolation2_resgroup_schedule index c6aad426821..5a346d23965 100644 --- a/src/test/isolation2/isolation2_resgroup_schedule +++ b/src/test/isolation2/isolation2_resgroup_schedule @@ -11,6 +11,7 @@ test: resgroup/resgroup_name_convention test: resgroup/resgroup_assign_slot_fail test: resgroup/resgroup_unassign_entrydb test: resgroup/resgroup_seg_down_2pc +test: resgroup/resgroup_query_mem # functions test: resgroup/resgroup_concurrency diff --git a/src/test/isolation2/sql/resgroup/resgroup_query_mem.sql b/src/test/isolation2/sql/resgroup/resgroup_query_mem.sql new file mode 100644 index 00000000000..b047fb99304 --- /dev/null +++ b/src/test/isolation2/sql/resgroup/resgroup_query_mem.sql @@ -0,0 +1,63 @@ +-- This test is to verify that query_mem is set correctly in QEs. +-- Previously, resgroup does not consider that different number of +-- segments among coordinator and segments. Now we let QEs to re-calculate +-- query_mem in each segment locally. This test case use the following +-- steps to verify the new method's correctness: +-- 1. fetch available memory in coordinator and a single segment, +-- compute the ratio +-- 2. use fault inject and plpython invokes pygresql with notice, +-- get a distributed plan's sort's operator memory in a QE +-- 3. Get sort's operator memory in a pure QD's plan (catalog order by) +-- 4. compute the ratio of two operator memorys +-- 5. these two ratios should be the same. + +create extension if not exists gp_inject_fault; +create or replace language plpython3u; + +create table t_qmem(a int); +select gp_inject_fault('rg_qmem_qd_qe', 'skip', dbid) from gp_segment_configuration where role = 'p' and content = 0; + +create function rg_qmem_test() returns boolean as $$ +from pg import DB +from copy import deepcopy +import re + +# 1: get resgroup available mem in QD and QE and compute ratio +sql = ("select memory_available m from " + "gp_toolkit.gp_resgroup_status_per_segment " + "where segment_id = %d and rsgname = 'admin_group'") +qd_mem = int(plpy.execute(sql % -1)[0]["m"]) +qe_mem = int(plpy.execute(sql % 0)[0]["m"]) +ratio1 = int(round(float(qd_mem) / qe_mem)) + +# 2. use notice to get qe operator mem +dbname = plpy.execute("select current_database() db")[0]["db"] +db = DB(dbname=dbname) +qe_opmem_info = [] +db.set_notice_receiver(lambda n: qe_opmem_info.append(deepcopy(n.message))) +sql = "select * from t_qmem order by 1" +db.query(sql) +qe_opmem = int(re.findall(r"op_mem=(\d+)", qe_opmem_info[0])[0]) +db.set_notice_receiver(None) + +# 3. get qd operator mem +sql = "explain analyze select * from pg_class order by relpages limit 10" +db.query("set gp_resgroup_print_operator_memory_limits = on;") +r = db.query(sql).getresult() +for (line, ) in r: + if "-> Sort" not in line: continue + qd_opmem = int(re.findall(r"operatorMem: (\d+)", line)[0]) + break + +db.close() + +ratio2 = int(round(float(qd_opmem) / qe_opmem)) + +return ratio1 == ratio2 + +$$ language plpython3u; + +select rg_qmem_test(); +select gp_inject_fault('rg_qmem_qd_qe', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0; +drop function rg_qmem_test(); +drop table t_qmem; From fd0075fd37bcb4eee0bea0e1f11d4b2d55fe7ae8 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Wed, 21 Nov 2018 16:16:10 -0800 Subject: [PATCH 09/46] CdbComponentDatabaseInfo: add an active segdb list ...to help with debugging and introspection. This will allow us to pull information about the active segments during execution, and it will form the basis of the gp_backend_info() function. --- src/backend/cdb/cdbutil.c | 4 ++++ src/include/postmaster/fts_comm.h | 1 + 2 files changed, 5 insertions(+) diff --git a/src/backend/cdb/cdbutil.c b/src/backend/cdb/cdbutil.c index 264bdc8c284..36f505105d3 100644 --- a/src/backend/cdb/cdbutil.c +++ b/src/backend/cdb/cdbutil.c @@ -430,6 +430,7 @@ getCdbComponentInfo(void) pRow->cdbs = component_databases; pRow->config = config; pRow->freelist = NIL; + pRow->activelist = NIL; pRow->numIdleQEs = 0; pRow->numActiveQEs = 0; @@ -816,6 +817,7 @@ cdbcomponent_allocateIdleQE(int contentId, SegmentType segmentType) cdbconn_setQEIdentifier(segdbDesc, -1); + cdbinfo->activelist = lcons(segdbDesc, cdbinfo->activelist); INCR_COUNT(cdbinfo, numActiveQEs); MemoryContextSwitchTo(oldContext); @@ -881,6 +883,8 @@ cdbcomponent_recycleIdleQE(SegmentDatabaseDescriptor *segdbDesc, bool forceDestr isWriter = segdbDesc->isWriter; /* update num of active QEs */ + Assert(list_member_ptr(cdbinfo->activelist, segdbDesc)); + cdbinfo->activelist = list_delete_ptr(cdbinfo->activelist, segdbDesc); DECR_COUNT(cdbinfo, numActiveQEs); oldContext = MemoryContextSwitchTo(CdbComponentsContext); diff --git a/src/include/postmaster/fts_comm.h b/src/include/postmaster/fts_comm.h index 16fc53247c2..abd2c721e4c 100644 --- a/src/include/postmaster/fts_comm.h +++ b/src/include/postmaster/fts_comm.h @@ -164,6 +164,7 @@ struct CdbComponentDatabaseInfo int16 hostSegs; /* number of primary segments on the same hosts */ List *freelist; /* list of idle segment dbs */ int numIdleQEs; + List *activelist; /* list of active segment dbs */ int numActiveQEs; }; From 43eb8bcd52fd5a7e0a4b31bba217a3cfd78959a0 Mon Sep 17 00:00:00 2001 From: Divyesh Vanjare Date: Wed, 13 Apr 2022 14:32:12 -0700 Subject: [PATCH 10/46] Add gp_backend_info() for runtime introspection/debugging To debug into the master backend for a given Postgres session, you can SELECT pg_backend_pid() and attach a debugger to the resulting process ID. We currently have no corresponding function for the segment backends, however -- developers have to read the output of `ps` and try to correlate their connected session to the correct backends. This is error-prone, especially if there are many sessions in flight. gp_backend_info() is an attempt to fill this gap. Running SELECT * FROM gp_backend_info(); will return a table of the following format: id | type | content | host | port | pid ----+------+---------+-----------+-------+------- -1 | Q | -1 | pchampion | 25431 | 50430 0 | w | 0 | pchampion | 25432 | 50431 1 | w | 1 | pchampion | 25433 | 50432 2 | w | 2 | pchampion | 25434 | 50433 This allows developers to jump directly to the correct host and PID for a given backend. This patch supports backends for writer gangs (type 'w' in the table), reader gangs ('r'), master QD backend ('Q') and master singleton readers ('R'). Co-authored-by: Soumyadeep Chakraborty Co-authored-by: Divyesh Vanjare --- src/backend/cdb/dispatcher/cdbgang.c | 187 +++++++++++++++ src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_proc.dat | 5 +- src/include/cdb/cdbgang.h | 2 + src/test/regress/expected/gp_backend_info.out | 227 ++++++++++++++++++ src/test/regress/expected/motion_socket.out | 36 ++- src/test/regress/greenplum_schedule | 2 +- src/test/regress/sql/gp_backend_info.sql | 84 +++++++ src/test/regress/sql/motion_socket.sql | 40 ++- 9 files changed, 540 insertions(+), 45 deletions(-) create mode 100644 src/test/regress/expected/gp_backend_info.out create mode 100644 src/test/regress/sql/gp_backend_info.sql diff --git a/src/backend/cdb/dispatcher/cdbgang.c b/src/backend/cdb/dispatcher/cdbgang.c index c6c4cd64312..4b80b3f3fe2 100644 --- a/src/backend/cdb/dispatcher/cdbgang.c +++ b/src/backend/cdb/dispatcher/cdbgang.c @@ -46,6 +46,9 @@ #include "utils/guc_tables.h" +#include "funcapi.h" +#include "utils/builtins.h" + /* * All QEs are managed by cdb_component_dbs in QD, QD assigned * a unique identifier for each QE, when a QE is created, this @@ -943,3 +946,187 @@ ResetAllGangs(void) DisconnectAndDestroyAllGangs(true); GpDropTempTables(); } + +/* + * Used by gp_backend_info() to find a single character that represents a + * backend type. + */ +static char +backend_type(SegmentDatabaseDescriptor *segdb) +{ + if (segdb->identifier == -1) + { + /* QD backend */ + return 'Q'; + } + if (segdb->segindex == -1) + { + /* Entry singleton reader. */ + return 'R'; + } + + return (segdb->isWriter ? 'w' : 'r'); +} + +/* + * qsort comparator for SegmentDatabaseDescriptors. Sorts by descriptor ID. + */ +static int +compare_segdb_id(const void *v1, const void *v2) +{ + SegmentDatabaseDescriptor *d1 = (SegmentDatabaseDescriptor *) lfirst(*(ListCell **) v1); + SegmentDatabaseDescriptor *d2 = (SegmentDatabaseDescriptor *) lfirst(*(ListCell **) v2); + + return d1->identifier - d2->identifier; +} + +/* + * Returns a list of rows, each corresponding to a connected segment backend and + * containing information on the role and definition of that backend (e.g. host, + * port, PID). + * + * SELECT * from gp_backend_info(); + */ +Datum +gp_backend_info(PG_FUNCTION_ARGS) +{ + if (Gp_role != GP_ROLE_DISPATCH) + ereport(ERROR, (errcode(ERRCODE_GP_COMMAND_ERROR), + errmsg("gp_backend_info() could only be called on QD"))); + + /* Our struct for funcctx->user_fctx. */ + struct func_ctx + { + List *segdbs; /* the SegmentDatabaseDescriptor entries we will output */ + ListCell *curpos; /* pointer to our current position in .segdbs */ + }; + + FuncCallContext *funcctx; + struct func_ctx *user_fctx; + + /* Number of attributes we'll return per row. Must match the catalog. */ +#define BACKENDINFO_NATTR 6 + + if (SRF_IS_FIRSTCALL()) + { + /* Standard first-call setup. */ + MemoryContext oldcontext; + TupleDesc tupdesc; + CdbComponentDatabases *cdbs; + int i; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + funcctx->user_fctx = user_fctx = palloc0(sizeof(*user_fctx)); + + /* Construct the list of all known segment DB descriptors. */ + cdbs = cdbcomponent_getCdbComponents(); + + for (i = 0; i < cdbs->total_entry_dbs; ++i) + { + CdbComponentDatabaseInfo *cdbinfo = &cdbs->entry_db_info[i]; + + user_fctx->segdbs = + list_concat_unique_ptr(user_fctx->segdbs, cdbinfo->activelist); + user_fctx->segdbs = + list_concat_unique_ptr(user_fctx->segdbs, cdbinfo->freelist); + } + + for (i = 0; i < cdbs->total_segment_dbs; ++i) + { + CdbComponentDatabaseInfo *cdbinfo = &cdbs->segment_db_info[i]; + + user_fctx->segdbs = + list_concat_unique_ptr(user_fctx->segdbs, cdbinfo->activelist); + user_fctx->segdbs = + list_concat_unique_ptr(user_fctx->segdbs, cdbinfo->freelist); + } + /* Fake a segment descriptor to represent the current QD backend */ + SegmentDatabaseDescriptor *qddesc = palloc0(sizeof(SegmentDatabaseDescriptor)); + qddesc->segment_database_info = cdbcomponent_getComponentInfo(MASTER_CONTENT_ID); + qddesc->segindex = -1; + qddesc->conn = NULL; + qddesc->motionListener = 0; + qddesc->backendPid = MyProcPid; + qddesc->whoami = NULL; + qddesc->isWriter = false; + qddesc->identifier = -1; + + user_fctx->segdbs = lcons(qddesc, user_fctx->segdbs); + /* + * For a slightly better default user experience, sort by descriptor ID. + * Users may of course specify their own ORDER BY if they don't like it. + */ + user_fctx->segdbs = list_qsort(user_fctx->segdbs, compare_segdb_id); + user_fctx->curpos = list_head(user_fctx->segdbs); + + /* Create a descriptor for the records we'll be returning. */ + tupdesc = CreateTemplateTupleDesc(BACKENDINFO_NATTR); + TupleDescInitEntry(tupdesc, 1, "id", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, 2, "type", CHAROID, -1, 0); + TupleDescInitEntry(tupdesc, 3, "content", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, 4, "host", TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, 5, "port", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, 6, "pid", INT4OID, -1, 0); + + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + + /* Tell the caller how many rows we'll return. */ + funcctx->max_calls = list_length(user_fctx->segdbs); + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + + /* Construct and return a row for every entry. */ + if (funcctx->call_cntr < funcctx->max_calls) + { + Datum values[BACKENDINFO_NATTR] = {0}; + bool nulls[BACKENDINFO_NATTR] = {0}; + HeapTuple tuple; + SegmentDatabaseDescriptor *dbdesc; + CdbComponentDatabaseInfo *dbinfo; + + user_fctx = funcctx->user_fctx; + + /* Get the next descriptor. */ + dbdesc = lfirst(user_fctx->curpos); + user_fctx->curpos = lnext(user_fctx->curpos); + + /* Fill in the row attributes. */ + dbinfo = dbdesc->segment_database_info; + + values[0] = Int32GetDatum(dbdesc->identifier); /* id */ + values[1] = CharGetDatum(backend_type(dbdesc)); /* type */ + values[2] = Int32GetDatum(dbdesc->segindex); /* content */ + + if (dbinfo->config->hostname) /* host */ + values[3] = CStringGetTextDatum(dbinfo->config->hostname); + else + nulls[3] = true; + + values[4] = Int32GetDatum(dbinfo->config->port); /* port */ + values[5] = Int32GetDatum(dbdesc->backendPid); /* pid */ + + /* Form the new tuple using our attributes and return it. */ + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); + } + else + { + /* Clean up. */ + user_fctx = funcctx->user_fctx; + if (user_fctx) + { + list_free(user_fctx->segdbs); + pfree(user_fctx); + + funcctx->user_fctx = NULL; + } + + SRF_RETURN_DONE(funcctx); + } +#undef BACKENDINFO_NATTR +} diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index da23a4c701f..e0b273a178f 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -56,6 +56,6 @@ */ /* 3yyymmddN */ -#define CATALOG_VERSION_NO 302402231 +#define CATALOG_VERSION_NO 302204081 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 538b209c31f..37f35ca8018 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11971,6 +11971,10 @@ { oid => 7182, descr => 'wait until all endpoint of this parallel retrieve cursor has been retrieved finished', proname => 'gp_wait_parallel_retrieve_cursor', provolatile => 'v', proparallel => 'u', prorettype => 'bool', proargtypes => 'text int4', proallargtypes => '{text,int4,bool}', proargmodes => '{i,i,o}', proargnames => '{cursorname,timeout_sec,finished}', prosrc => 'gp_wait_parallel_retrieve_cursor', proexeclocation => 'c' }, +{ oid => 7183, descr => 'debugging information for segment backends', + proname => 'gp_backend_info', prorettype => 'record', prorows => '1', proretset => 't', proargtypes => '', proallargtypes => '{int4,char,int4,text,int4,int4}', prosrc => 'gp_backend_info', pronargs => 6, + proargnames => '{id,type,content,host,port,pid}', proargmodes => '{o,o,o,o,o,o}', proexeclocation => 'c'} + { oid => 7050, descr => 'bitmap(internal)', proname => 'bmhandler', provolatile => 'v', prorettype => 'index_am_handler', proargtypes => 'internal', prosrc => 'bmhandler' }, @@ -12438,7 +12442,6 @@ { oid => 7145, descr => 'Legacy cdbhash function', proname => 'cdblegacyhash_anyenum', prorettype => 'int4', proargtypes => 'anyenum', prosrc => 'cdblegacyhash_anyenum' }, - { oid => 6998, descr => 'Create a named restore point on all segments', proname => 'gp_create_restore_point', prorows => '1000', proretset => 't', proparallel => 'u', provolatile => 'v', prorettype => 'record', proargtypes => 'text', prosrc => 'gp_create_restore_point' }, diff --git a/src/include/cdb/cdbgang.h b/src/include/cdb/cdbgang.h index cec28c3f718..552263a7cf1 100644 --- a/src/include/cdb/cdbgang.h +++ b/src/include/cdb/cdbgang.h @@ -129,4 +129,6 @@ typedef struct CdbProcess typedef Gang *(*CreateGangFunc)(List *segments, SegmentType segmentType); +extern Datum gp_backend_info(PG_FUNCTION_ARGS); + #endif /* _CDBGANG_H_ */ diff --git a/src/test/regress/expected/gp_backend_info.out b/src/test/regress/expected/gp_backend_info.out new file mode 100644 index 00000000000..6bc00e581b7 --- /dev/null +++ b/src/test/regress/expected/gp_backend_info.out @@ -0,0 +1,227 @@ +-- Tests for the gp_backend_info() function. +-- At first there should be no segment backends; we haven't performed any +-- queries yet. There should only be a QD backend +SELECT COUNT(*) = 1 FROM gp_backend_info(); + ?column? +---------- + t +(1 row) + +SELECT type, content FROM gp_backend_info(); + type | content +------+--------- + Q | -1 +(1 row) + +-- +-- Spin up the writer gang. +-- +CREATE TEMPORARY TABLE temp(); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry. +--start_ignore +-- Help debugging by printing the results. Since most contents will be different +-- on every machine, we do the actual verification below. +SELECT * from gp_backend_info(); + id | type | content | host | port | pid +----+------+---------+-------------------------+------+------- + -1 | Q | -1 | vanjared-a01.vmware.com | 7000 | 89615 + 0 | w | 0 | vanjared-a01.vmware.com | 7002 | 89619 + 1 | w | 1 | vanjared-a01.vmware.com | 7003 | 89620 + 2 | w | 2 | vanjared-a01.vmware.com | 7004 | 89621 +(4 rows) + +--end_ignore +-- Now we should have as many backends as primaries +1 QD, and all primaries +-- backend should be marked as writers +SELECT COUNT(*) AS num_primaries FROM gp_segment_configuration + WHERE content >= 0 AND role = 'p' +\gset +SELECT COUNT(*) = :num_primaries +1 FROM gp_backend_info(); + ?column? +---------- + t +(1 row) + +SELECT COUNT(*) = :num_primaries FROM gp_backend_info() WHERE type = 'w'; + ?column? +---------- + t +(1 row) + +SELECT COUNT(*) = 1 FROM gp_backend_info() WHERE type = 'Q'; + ?column? +---------- + t +(1 row) + +-- All IDs and PIDs should be distinct. +SELECT COUNT(DISTINCT id) = :num_primaries +1 FROM gp_backend_info(); + ?column? +---------- + t +(1 row) + +SELECT COUNT(DISTINCT content) = :num_primaries +1 FROM gp_backend_info(); + ?column? +---------- + t +(1 row) + +SELECT COUNT(DISTINCT pid) = :num_primaries +1 FROM gp_backend_info(); + ?column? +---------- + t +(1 row) + +-- +-- Spin up a parallel reader gang. +-- +CREATE TEMPORARY TABLE temp2(); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry. +SELECT * FROM temp JOIN (SELECT * FROM temp2) temp2 ON (temp = temp2); +-- +(0 rows) + +--start_ignore +-- Debugging helper (see above). +SELECT * from gp_backend_info(); + id | type | content | host | port | pid +----+------+---------+-------------------------+------+------- + -1 | Q | -1 | vanjared-a01.vmware.com | 7000 | 89615 + 0 | w | 0 | vanjared-a01.vmware.com | 7002 | 89619 + 1 | w | 1 | vanjared-a01.vmware.com | 7003 | 89620 + 2 | w | 2 | vanjared-a01.vmware.com | 7004 | 89621 + 3 | r | 0 | vanjared-a01.vmware.com | 7002 | 89625 + 4 | r | 1 | vanjared-a01.vmware.com | 7003 | 89626 + 5 | r | 2 | vanjared-a01.vmware.com | 7004 | 89627 +(7 rows) + +--end_ignore +-- Now we should have double the number of backends; the new ones should be +-- readers. +SELECT COUNT(*) = (:num_primaries * 2) +1 FROM gp_backend_info(); + ?column? +---------- + t +(1 row) + +SELECT COUNT(*) = :num_primaries FROM gp_backend_info() WHERE type = 'w'; + ?column? +---------- + t +(1 row) + +SELECT COUNT(*) = :num_primaries FROM gp_backend_info() WHERE type = 'r'; + ?column? +---------- + t +(1 row) + +SELECT COUNT(*) = 1 FROM gp_backend_info() WHERE type = 'Q'; + ?column? +---------- + t +(1 row) + +-- IDs and PIDs should still be distinct. +SELECT COUNT(DISTINCT id) = (:num_primaries * 2) +1 FROM gp_backend_info(); + ?column? +---------- + t +(1 row) + +SELECT COUNT(DISTINCT pid) = (:num_primaries * 2) +1 FROM gp_backend_info(); + ?column? +---------- + t +(1 row) + +-- Content IDs should be there twice (a reader and a writer for each segment). +SELECT COUNT(DISTINCT content) = :num_primaries +1 FROM gp_backend_info(); + ?column? +---------- + t +(1 row) + +SELECT COUNT(DISTINCT content) = :num_primaries FROM gp_backend_info() +WHERE content >= 0; + ?column? +---------- + t +(1 row) + +SELECT DISTINCT COUNT(content) FROM gp_backend_info() WHERE content >= 0 +GROUP BY content; + count +------- + 2 +(1 row) + +-- +-- Start up a singleton reader. +-- +SELECT * FROM temp JOIN (SELECT oid FROM pg_class) temp2 on (temp = temp2); + oid +----- +(0 rows) + +--start_ignore +-- Debugging helper (see above). +SELECT * from gp_backend_info(); + id | type | content | host | port | pid +----+------+---------+-------------------------+------+------- + -1 | Q | -1 | vanjared-a01.vmware.com | 7000 | 89615 + 0 | w | 0 | vanjared-a01.vmware.com | 7002 | 89619 + 1 | w | 1 | vanjared-a01.vmware.com | 7003 | 89620 + 2 | w | 2 | vanjared-a01.vmware.com | 7004 | 89621 + 3 | r | 0 | vanjared-a01.vmware.com | 7002 | 89625 + 4 | r | 1 | vanjared-a01.vmware.com | 7003 | 89626 + 5 | r | 2 | vanjared-a01.vmware.com | 7004 | 89627 + 6 | R | -1 | vanjared-a01.vmware.com | 7000 | 89629 +(8 rows) + +--end_ignore +-- We should have added only one backend -- the singleton reader on the master. +SELECT COUNT(*) = (:num_primaries * 2 + 2) FROM gp_backend_info(); + ?column? +---------- + t +(1 row) + +SELECT COUNT(*) = :num_primaries FROM gp_backend_info() WHERE type = 'w'; + ?column? +---------- + t +(1 row) + +SELECT COUNT(*) = :num_primaries FROM gp_backend_info() WHERE type = 'r'; + ?column? +---------- + t +(1 row) + +SELECT COUNT(*) = 1 FROM gp_backend_info() WHERE type = 'R' and content = -1; + ?column? +---------- + t +(1 row) + +SELECT COUNT(*) = 1 FROM gp_backend_info() WHERE type = 'Q' and content = -1; + ?column? +---------- + t +(1 row) + +-- IDs and PIDs should still be distinct. +SELECT COUNT(DISTINCT id) = (:num_primaries * 2 + 2) FROM gp_backend_info(); + ?column? +---------- + t +(1 row) + +SELECT COUNT(DISTINCT pid) = (:num_primaries * 2 + 2) FROM gp_backend_info(); + ?column? +---------- + t +(1 row) + diff --git a/src/test/regress/expected/motion_socket.out b/src/test/regress/expected/motion_socket.out index cbb6ffeb965..d9b20ac1932 100644 --- a/src/test/regress/expected/motion_socket.out +++ b/src/test/regress/expected/motion_socket.out @@ -32,28 +32,24 @@ else: res = plpy.execute("SELECT address FROM gp_segment_configuration;", 1) hostip = socket.gethostbyname(res[0]['address']) -res = plpy.execute("SELECT current_setting('gp_session_id');", 1) -qd_backend_conn_id = res[0]['current_setting'] +res = plpy.execute("SELECT pid from gp_backend_info();") +pids_to_check = [r['pid'] for r in res] -for process in psutil.process_iter(): - # We iterate through all backends related to connection id - # of current session - # Exclude zombies to avoid psutil.ZombieProcess exceptions - # on calling process.cmdline() - if process.name() == 'postgres' and process.status() != psutil.STATUS_ZOMBIE: - if ' con' + qd_backend_conn_id + ' ' in process.cmdline()[0]: - motion_socket_count = 0 - plpy.info('Checking postgres backend {}'.format(process.cmdline()[0])) - for conn in process.connections(): - if conn.type == expected_socket_kind and conn.raddr == () \ - and conn.laddr.ip == hostip: - motion_socket_count += 1 +for pid in pids_to_check: + # We iterate through all backends related to current session + motion_socket_count = 0 + process = psutil.Process(pid) + plpy.info('Checking postgres backend {}'.format(process.cmdline()[0])) + for conn in process.connections(): + if conn.type == expected_socket_kind and conn.raddr == () \ + and conn.laddr.ip == hostip: + motion_socket_count += 1 - if motion_socket_count != expected_socket_count_per_segment: - plpy.error('Expected {} motion sockets but found {}. '\ - 'For backend process {}. connections= {}'\ - .format(expected_socket_count_per_segment, process,\ - motion_socket_count, process.connections())) + if motion_socket_count != expected_socket_count_per_segment: + plpy.error('Expected {} motion sockets but found {}. '\ + 'For backend process {}. connections= {}'\ + .format(expected_socket_count_per_segment, process,\ + motion_socket_count, process.connections())) $$ LANGUAGE plpython3u EXECUTE ON MASTER; diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index a6e4f8bc62d..20f5c34500b 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -40,7 +40,7 @@ test: instr_in_shmem_setup test: instr_in_shmem test: createdb -test: gp_aggregates gp_aggregates_costs gp_metadata variadic_parameters default_parameters function_extensions spi gp_xml shared_scan update_gp triggers_gp returning_gp resource_queue_with_rule gp_types gp_index cluster_gp combocid_gp gp_sort gp_prepared_xacts +test: gp_aggregates gp_aggregates_costs gp_metadata variadic_parameters default_parameters function_extensions spi gp_xml shared_scan update_gp triggers_gp returning_gp resource_queue_with_rule gp_types gp_index cluster_gp combocid_gp gp_sort gp_prepared_xacts gp_backend_info test: spi_processed64bit test: gp_tablespace_with_faults # below test(s) inject faults so each of them need to be in a separate group diff --git a/src/test/regress/sql/gp_backend_info.sql b/src/test/regress/sql/gp_backend_info.sql new file mode 100644 index 00000000000..65c1936cbcc --- /dev/null +++ b/src/test/regress/sql/gp_backend_info.sql @@ -0,0 +1,84 @@ +-- Tests for the gp_backend_info() function. + +-- At first there should be no segment backends; we haven't performed any +-- queries yet. There should only be a QD backend +SELECT COUNT(*) = 1 FROM gp_backend_info(); + +SELECT type, content FROM gp_backend_info(); +-- +-- Spin up the writer gang. +-- + +CREATE TEMPORARY TABLE temp(); + +--start_ignore +-- Help debugging by printing the results. Since most contents will be different +-- on every machine, we do the actual verification below. +SELECT * from gp_backend_info(); +--end_ignore + +-- Now we should have as many backends as primaries +1 QD, and all primaries +-- backend should be marked as writers +SELECT COUNT(*) AS num_primaries FROM gp_segment_configuration + WHERE content >= 0 AND role = 'p' +\gset +SELECT COUNT(*) = :num_primaries +1 FROM gp_backend_info(); +SELECT COUNT(*) = :num_primaries FROM gp_backend_info() WHERE type = 'w'; +SELECT COUNT(*) = 1 FROM gp_backend_info() WHERE type = 'Q'; + +-- All IDs and PIDs should be distinct. +SELECT COUNT(DISTINCT id) = :num_primaries +1 FROM gp_backend_info(); +SELECT COUNT(DISTINCT content) = :num_primaries +1 FROM gp_backend_info(); +SELECT COUNT(DISTINCT pid) = :num_primaries +1 FROM gp_backend_info(); + +-- +-- Spin up a parallel reader gang. +-- + +CREATE TEMPORARY TABLE temp2(); +SELECT * FROM temp JOIN (SELECT * FROM temp2) temp2 ON (temp = temp2); + +--start_ignore +-- Debugging helper (see above). +SELECT * from gp_backend_info(); +--end_ignore + +-- Now we should have double the number of backends; the new ones should be +-- readers. +SELECT COUNT(*) = (:num_primaries * 2) +1 FROM gp_backend_info(); +SELECT COUNT(*) = :num_primaries FROM gp_backend_info() WHERE type = 'w'; +SELECT COUNT(*) = :num_primaries FROM gp_backend_info() WHERE type = 'r'; +SELECT COUNT(*) = 1 FROM gp_backend_info() WHERE type = 'Q'; + +-- IDs and PIDs should still be distinct. +SELECT COUNT(DISTINCT id) = (:num_primaries * 2) +1 FROM gp_backend_info(); +SELECT COUNT(DISTINCT pid) = (:num_primaries * 2) +1 FROM gp_backend_info(); + +-- Content IDs should be there twice (a reader and a writer for each segment). +SELECT COUNT(DISTINCT content) = :num_primaries +1 FROM gp_backend_info(); +SELECT COUNT(DISTINCT content) = :num_primaries FROM gp_backend_info() +WHERE content >= 0; +SELECT DISTINCT COUNT(content) FROM gp_backend_info() WHERE content >= 0 +GROUP BY content; + +-- +-- Start up a singleton reader. +-- + +SELECT * FROM temp JOIN (SELECT oid FROM pg_class) temp2 on (temp = temp2); + +--start_ignore +-- Debugging helper (see above). +SELECT * from gp_backend_info(); +--end_ignore + +-- We should have added only one backend -- the singleton reader on the master. +SELECT COUNT(*) = (:num_primaries * 2 + 2) FROM gp_backend_info(); +SELECT COUNT(*) = :num_primaries FROM gp_backend_info() WHERE type = 'w'; +SELECT COUNT(*) = :num_primaries FROM gp_backend_info() WHERE type = 'r'; +SELECT COUNT(*) = 1 FROM gp_backend_info() WHERE type = 'R' and content = -1; +SELECT COUNT(*) = 1 FROM gp_backend_info() WHERE type = 'Q' and content = -1; + +-- IDs and PIDs should still be distinct. +SELECT COUNT(DISTINCT id) = (:num_primaries * 2 + 2) FROM gp_backend_info(); +SELECT COUNT(DISTINCT pid) = (:num_primaries * 2 + 2) FROM gp_backend_info(); diff --git a/src/test/regress/sql/motion_socket.sql b/src/test/regress/sql/motion_socket.sql index c3c396ad41e..92210e1cb18 100644 --- a/src/test/regress/sql/motion_socket.sql +++ b/src/test/regress/sql/motion_socket.sql @@ -34,28 +34,24 @@ else: res = plpy.execute("SELECT address FROM gp_segment_configuration;", 1) hostip = socket.gethostbyname(res[0]['address']) -res = plpy.execute("SELECT current_setting('gp_session_id');", 1) -qd_backend_conn_id = res[0]['current_setting'] - -for process in psutil.process_iter(): - # We iterate through all backends related to connection id - # of current session - # Exclude zombies to avoid psutil.ZombieProcess exceptions - # on calling process.cmdline() - if process.name() == 'postgres' and process.status() != psutil.STATUS_ZOMBIE: - if ' con' + qd_backend_conn_id + ' ' in process.cmdline()[0]: - motion_socket_count = 0 - plpy.info('Checking postgres backend {}'.format(process.cmdline()[0])) - for conn in process.connections(): - if conn.type == expected_socket_kind and conn.raddr == () \ - and conn.laddr.ip == hostip: - motion_socket_count += 1 - - if motion_socket_count != expected_socket_count_per_segment: - plpy.error('Expected {} motion sockets but found {}. '\ - 'For backend process {}. connections= {}'\ - .format(expected_socket_count_per_segment, process,\ - motion_socket_count, process.connections())) +res = plpy.execute("SELECT pid from gp_backend_info();") +pids_to_check = [r['pid'] for r in res] + +for pid in pids_to_check: + # We iterate through all backends related to current session + motion_socket_count = 0 + process = psutil.Process(pid) + plpy.info('Checking postgres backend {}'.format(process.cmdline()[0])) + for conn in process.connections(): + if conn.type == expected_socket_kind and conn.raddr == () \ + and conn.laddr.ip == hostip: + motion_socket_count += 1 + + if motion_socket_count != expected_socket_count_per_segment: + plpy.error('Expected {} motion sockets but found {}. '\ + 'For backend process {}. connections= {}'\ + .format(expected_socket_count_per_segment, process,\ + motion_socket_count, process.connections())) $$ LANGUAGE plpython3u EXECUTE ON MASTER; From cf1cf5b9fd8d0ef71ead63ccc76ce5b16ed8a068 Mon Sep 17 00:00:00 2001 From: Zhenghua Lyu Date: Tue, 17 May 2022 09:18:45 +0800 Subject: [PATCH 11/46] Use hostname instead of ip to compute the host_segments. The global variable host_segments is **only** used on QEs under resource group mode and the value is dispatched to QEs from QD. Previously in the function getCdbComponentInfo(), QD make a hashtable, and count host_segments group by the key of ip address. This is not correct. A typical Greenplum deployment environment may have different ip addresses point to the same machine. Use ip address as hash key will lead to wrong number of host_segments and lead to more memory limit of a segment than user's intent. This commit use hostname as a machine's unique identifier to fix the issue. Also change some names to better show the meanings. --- src/backend/cdb/cdbutil.c | 57 ++++++++++++------- src/backend/cdb/dispatcher/cdbdisp_query.c | 2 +- src/backend/cdb/dispatcher/cdbgang.c | 11 ++-- src/backend/cdb/dispatcher/cdbgang_async.c | 2 +- src/backend/executor/execMain.c | 4 +- .../utils/resgroup/resgroup-ops-linux.c | 2 +- src/backend/utils/resgroup/resgroup.c | 14 ++--- src/bin/gpfts/fts.c | 4 +- src/include/cdb/cdbgang.h | 2 +- src/include/postmaster/fts_comm.h | 2 +- src/include/utils/resgroup.h | 2 +- 11 files changed, 59 insertions(+), 43 deletions(-) diff --git a/src/backend/cdb/cdbutil.c b/src/backend/cdb/cdbutil.c index 36f505105d3..237b5603004 100644 --- a/src/backend/cdb/cdbutil.c +++ b/src/backend/cdb/cdbutil.c @@ -95,7 +95,7 @@ static GpSegConfigEntry * readGpSegConfigFromCatalog(int *total_dbs); static GpSegConfigEntry * readGpSegConfigFromFTSFiles(int *total_dbs); static void getAddressesForDBid(GpSegConfigEntry *c, int elevel); -static HTAB *hostSegsHashTableInit(void); +static HTAB *hostPrimaryCountHashTableInit(void); static int nextQEIdentifer(CdbComponentDatabases *cdbs); @@ -109,11 +109,11 @@ typedef struct SegIpEntry char hostinfo[NI_MAXHOST]; } SegIpEntry; -typedef struct HostSegsEntry +typedef struct HostPrimaryCountEntry { - char hostip[INET6_ADDRSTRLEN]; + char hostname[MAXHOSTNAMELEN]; int segmentCount; -} HostSegsEntry; +} HostPrimaryCountEntry; /* * Helper functions for fetching latest gp_segment_configuration outside of @@ -359,7 +359,7 @@ getCdbComponentInfo(void) int total_dbs = 0; bool found; - HostSegsEntry *hsEntry; + HostPrimaryCountEntry *hsEntry; if (!CdbComponentsContext) CdbComponentsContext = AllocSetContextCreate(TopMemoryContext, "cdb components Context", @@ -369,7 +369,7 @@ getCdbComponentInfo(void) oldContext = MemoryContextSwitchTo(CdbComponentsContext); - HTAB *hostSegsHash = hostSegsHashTableInit(); + HTAB *hostPrimaryCountHash = hostPrimaryCountHashTableInit(); if (IsTransactionState()) configs = readGpSegConfigFromCatalog(&total_dbs); @@ -394,6 +394,19 @@ getCdbComponentInfo(void) CdbComponentDatabaseInfo *pRow; GpSegConfigEntry *config = &configs[i]; + if (config->hostname == NULL || strlen(config->hostname) > MAXHOSTNAMELEN) + { + /* + * We should never reach here, but add sanity check + * The reason we check length is we find MAXHOSTNAMELEN might be + * smaller than the ones defined in /etc/hosts. Those are rare cases. + */ + elog(ERROR, + "Invalid length (%d) of hostname (%s)", + config->hostname == NULL ? 0 : (int) strlen(config->hostname), + config->hostname == NULL ? "" : config->hostname); + } + /* lookup hostip/hostaddrs cache */ config->hostip= NULL; getAddressesForDBid(config, !am_ftsprobe? ERROR : LOG); @@ -434,10 +447,10 @@ getCdbComponentInfo(void) pRow->numIdleQEs = 0; pRow->numActiveQEs = 0; - if (config->role != GP_SEGMENT_CONFIGURATION_ROLE_PRIMARY || config->hostip == NULL) + if (config->role != GP_SEGMENT_CONFIGURATION_ROLE_PRIMARY) continue; - hsEntry = (HostSegsEntry *) hash_search(hostSegsHash, config->hostip, HASH_ENTER, &found); + hsEntry = (HostPrimaryCountEntry *) hash_search(hostPrimaryCountHash, config->hostname, HASH_ENTER, &found); if (found) hsEntry->segmentCount++; else @@ -551,27 +564,27 @@ getCdbComponentInfo(void) { cdbInfo = &component_databases->segment_db_info[i]; - if (cdbInfo->config->role != GP_SEGMENT_CONFIGURATION_ROLE_PRIMARY || cdbInfo->config->hostip == NULL) + if (cdbInfo->config->role != GP_SEGMENT_CONFIGURATION_ROLE_PRIMARY) continue; - hsEntry = (HostSegsEntry *) hash_search(hostSegsHash, cdbInfo->config->hostip, HASH_FIND, &found); + hsEntry = (HostPrimaryCountEntry *) hash_search(hostPrimaryCountHash, cdbInfo->config->hostname, HASH_FIND, &found); Assert(found); - cdbInfo->hostSegs = hsEntry->segmentCount; + cdbInfo->hostPrimaryCount = hsEntry->segmentCount; } for (i = 0; i < component_databases->total_entry_dbs; i++) { cdbInfo = &component_databases->entry_db_info[i]; - if (cdbInfo->config->role != GP_SEGMENT_CONFIGURATION_ROLE_PRIMARY || cdbInfo->config->hostip == NULL) + if (cdbInfo->config->role != GP_SEGMENT_CONFIGURATION_ROLE_PRIMARY) continue; - hsEntry = (HostSegsEntry *) hash_search(hostSegsHash, cdbInfo->config->hostip, HASH_FIND, &found); + hsEntry = (HostPrimaryCountEntry *) hash_search(hostPrimaryCountHash, cdbInfo->config->hostname, HASH_FIND, &found); Assert(found); - cdbInfo->hostSegs = hsEntry->segmentCount; + cdbInfo->hostPrimaryCount = hsEntry->segmentCount; } - hash_destroy(hostSegsHash); + hash_destroy(hostPrimaryCountHash); MemoryContextSwitchTo(oldContext); @@ -1395,18 +1408,18 @@ getAddressesForDBid(GpSegConfigEntry *c, int elevel) } /* - * hostSegsHashTableInit() - * Construct a hash table of HostSegsEntry + * hostPrimaryCountHashTableInit() + * Construct a hash table of HostPrimaryCountEntry */ static HTAB * -hostSegsHashTableInit(void) +hostPrimaryCountHashTableInit(void) { HASHCTL info; /* Set key and entry sizes. */ MemSet(&info, 0, sizeof(info)); - info.keysize = INET6_ADDRSTRLEN; - info.entrysize = sizeof(HostSegsEntry); + info.keysize = MAXHOSTNAMELEN; + info.entrysize = sizeof(HostPrimaryCountEntry); return hash_create("HostSegs", 32, &info, HASH_ELEM | HASH_STRINGS); } @@ -2930,7 +2943,7 @@ getCdbComponentInfo(void) hsEntry = (HostSegsEntry *) hash_search(hostSegsHash, cdbInfo->config->hostip, HASH_FIND, &found); Assert(found); - cdbInfo->hostSegs = hsEntry->segmentCount; + cdbInfo->hostPrimaryCount = hsEntry->segmentCount; } for (i = 0; i < component_databases->total_entry_dbs; i++) @@ -2942,7 +2955,7 @@ getCdbComponentInfo(void) hsEntry = (HostSegsEntry *) hash_search(hostSegsHash, cdbInfo->config->hostip, HASH_FIND, &found); Assert(found); - cdbInfo->hostSegs = hsEntry->segmentCount; + cdbInfo->hostPrimaryCount = hsEntry->segmentCount; } hash_destroy(hostSegsHash); diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index 0af87d54884..82420aa91d8 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -278,7 +278,7 @@ CdbDispatchPlan(struct QueryDesc *queryDesc, * fall back mode (use statement_mem). */ stmt->total_memory_coordinator = ResGroupOps_GetTotalMemory(); - stmt->nsegments_coordinator = ResGroupGetSegmentNum(); + stmt->nsegments_coordinator = ResGroupGetHostPrimaryCount(); } cdbdisp_dispatchX(queryDesc, planRequiresTxn, cancelOnError); diff --git a/src/backend/cdb/dispatcher/cdbgang.c b/src/backend/cdb/dispatcher/cdbgang.c index 4b80b3f3fe2..12cce038e93 100644 --- a/src/backend/cdb/dispatcher/cdbgang.c +++ b/src/backend/cdb/dispatcher/cdbgang.c @@ -61,9 +61,11 @@ int qe_identifier = 0; /* - * number of primary segments on this host + * Number of primary segments on this host. + * Note: This is only set on the segments and not on the coordinator. It is + * used primarily by resource groups. */ -int host_segments = 0; +int host_primary_segment_count = 0; /* * size of hash table of interconnect connections @@ -531,7 +533,7 @@ cdbgang_parse_gpqeid_params(struct Port *port pg_attribute_unused(), if (gpqeid_next_param(&cp, &np)) { - host_segments = (int) strtol(cp, NULL, 10); + host_primary_segment_count = (int) strtol(cp, NULL, 10); } if (gpqeid_next_param(&cp, &np)) @@ -543,7 +545,8 @@ cdbgang_parse_gpqeid_params(struct Port *port pg_attribute_unused(), if (!cp || np) goto bad; - if (gp_session_id <= 0 || PgStartTime <= 0 || qe_identifier < 0 || host_segments <= 0 || ic_htab_size <= 0) + if (gp_session_id <= 0 || PgStartTime <= 0 || qe_identifier < 0 || + host_primary_segment_count <= 0 || ic_htab_size <= 0) goto bad; pfree(gpqeid); diff --git a/src/backend/cdb/dispatcher/cdbgang_async.c b/src/backend/cdb/dispatcher/cdbgang_async.c index 0d65fa58142..e701e40e3ee 100644 --- a/src/backend/cdb/dispatcher/cdbgang_async.c +++ b/src/backend/cdb/dispatcher/cdbgang_async.c @@ -153,7 +153,7 @@ cdbgang_createGang_async(List *segments, SegmentType segmentType) ret = build_gpqeid_param(gpqeid, sizeof(gpqeid), segdbDesc->isWriter, segdbDesc->identifier, - segdbDesc->segment_database_info->hostSegs, + segdbDesc->segment_database_info->hostPrimaryCount, totalSegs * 2); if (!ret) diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 14b167a6f4d..0b96fe96a56 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -274,8 +274,8 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) should_skip_operator_memory_assign = false; /* Get total system memory on the QE in MB */ - int32 total_memory_segment = ResGroupOps_GetTotalMemory(); - int nsegments_segment = ResGroupGetSegmentNum(); + int total_memory_segment = ResGroupOps_GetTotalMemory(); + int nsegments_segment = ResGroupGetHostPrimaryCount(); uint64 coordinator_query_mem = queryDesc->plannedstmt->query_mem; /* diff --git a/src/backend/utils/resgroup/resgroup-ops-linux.c b/src/backend/utils/resgroup/resgroup-ops-linux.c index e35bcf75519..24001a9c570 100644 --- a/src/backend/utils/resgroup/resgroup-ops-linux.c +++ b/src/backend/utils/resgroup/resgroup-ops-linux.c @@ -1760,7 +1760,7 @@ ResGroupOps_SetMemoryLimit(Oid group, int memory_limit) int32 memory_limit_in_chunks; memory_limit_in_chunks = ResGroupGetVmemLimitChunks() * memory_limit / 100; - memory_limit_in_chunks *= ResGroupGetSegmentNum(); + memory_limit_in_chunks *= ResGroupGetHostPrimaryCount(); fd = ResGroupOps_LockGroup(group, comp, true); ResGroupOps_SetMemoryLimitByValue(group, memory_limit_in_chunks); diff --git a/src/backend/utils/resgroup/resgroup.c b/src/backend/utils/resgroup/resgroup.c index a5eb51848e2..a8a830d5dcd 100644 --- a/src/backend/utils/resgroup/resgroup.c +++ b/src/backend/utils/resgroup/resgroup.c @@ -569,7 +569,7 @@ InitResGroups(void) { Assert(IS_QUERY_DISPATCHER()); qdinfo = cdbcomponent_getComponentInfo(MASTER_CONTENT_ID); - pResGroupControl->segmentsOnMaster = qdinfo->hostSegs; + pResGroupControl->segmentsOnMaster = qdinfo->hostPrimaryCount; Assert(pResGroupControl->segmentsOnMaster > 0); } @@ -1074,9 +1074,9 @@ ResGroupGetStat(Oid groupId, ResGroupStatType type) * Get the number of primary segments on this host */ int -ResGroupGetSegmentNum() +ResGroupGetHostPrimaryCount() { - return (Gp_role == GP_ROLE_EXECUTE ? host_segments : pResGroupControl->segmentsOnMaster); + return (Gp_role == GP_ROLE_EXECUTE ? host_primary_segment_count : pResGroupControl->segmentsOnMaster); } static char * @@ -2113,7 +2113,7 @@ decideTotalChunks(int32 *totalChunks, int32 *chunkSizeInBits) int32 tmptotalChunks; int32 tmpchunkSizeInBits; - nsegments = Gp_role == GP_ROLE_EXECUTE ? host_segments : pResGroupControl->segmentsOnMaster; + nsegments = Gp_role == GP_ROLE_EXECUTE ? host_primary_segment_count : pResGroupControl->segmentsOnMaster; Assert(nsegments > 0); tmptotalChunks = ResGroupOps_GetTotalMemory() * gp_resource_group_memory_limit / nsegments; @@ -2824,7 +2824,7 @@ SwitchResGroupOnSegment(const char *buf, int len) Assert(group != NULL); /* Init self */ - Assert(host_segments > 0); + Assert(host_primary_segment_count > 0); Assert(caps.concurrency > 0); self->caps = caps; @@ -4194,10 +4194,10 @@ groupMemOnDumpForCgroup(ResGroupData *group, StringInfo str) appendStringInfo(str, "{"); appendStringInfo(str, "\"used\":%d, ", VmemTracker_ConvertVmemChunksToMB( - ResGroupOps_GetMemoryUsage(group->groupId) / ResGroupGetSegmentNum())); + ResGroupOps_GetMemoryUsage(group->groupId) / ResGroupGetHostPrimaryCount())); appendStringInfo(str, "\"limit_granted\":%d", VmemTracker_ConvertVmemChunksToMB( - ResGroupOps_GetMemoryLimit(group->groupId) / ResGroupGetSegmentNum())); + ResGroupOps_GetMemoryLimit(group->groupId) / ResGroupGetHostPrimaryCount())); appendStringInfo(str, "}"); } diff --git a/src/bin/gpfts/fts.c b/src/bin/gpfts/fts.c index 3bac24448d9..eef231fb811 100644 --- a/src/bin/gpfts/fts.c +++ b/src/bin/gpfts/fts.c @@ -748,7 +748,7 @@ initCdbComponentDatabases(GpSegConfigEntry * configs, int total_dbs) } } - cdbInfo->hostSegs = count; + cdbInfo->hostPrimaryCount = count; } for (i = 0; i < cdb_component_dbs->total_segment_dbs; i++) @@ -779,7 +779,7 @@ initCdbComponentDatabases(GpSegConfigEntry * configs, int total_dbs) } } - cdbInfo->hostSegs = count; + cdbInfo->hostPrimaryCount = count; } return; diff --git a/src/include/cdb/cdbgang.h b/src/include/cdb/cdbgang.h index 552263a7cf1..cffa7ecb84f 100644 --- a/src/include/cdb/cdbgang.h +++ b/src/include/cdb/cdbgang.h @@ -46,7 +46,7 @@ typedef struct Gang extern int qe_identifier; -extern int host_segments; +extern int host_primary_segment_count; extern int ic_htab_size; extern MemoryContext GangContext; diff --git a/src/include/postmaster/fts_comm.h b/src/include/postmaster/fts_comm.h index abd2c721e4c..3bf4bb5535d 100644 --- a/src/include/postmaster/fts_comm.h +++ b/src/include/postmaster/fts_comm.h @@ -161,7 +161,7 @@ struct CdbComponentDatabaseInfo CdbComponentDatabases *cdbs; /* point to owners */ - int16 hostSegs; /* number of primary segments on the same hosts */ + int16 hostPrimaryCount; /* number of primary segments on the same hosts */ List *freelist; /* list of idle segment dbs */ int numIdleQEs; List *activelist; /* list of active segment dbs */ diff --git a/src/include/utils/resgroup.h b/src/include/utils/resgroup.h index 83a82fbe879..3b7d558b053 100644 --- a/src/include/utils/resgroup.h +++ b/src/include/utils/resgroup.h @@ -206,7 +206,7 @@ extern int64 ResourceGroupGetQueryMemoryLimit(void); extern void ResGroupDumpInfo(StringInfo str); -extern int ResGroupGetSegmentNum(void); +extern int ResGroupGetHostPrimaryCount(void); extern Bitmapset *CpusetToBitset(const char *cpuset, int len); From 5589beb8da2d1e248119b7b8185879fdfcae61f4 Mon Sep 17 00:00:00 2001 From: Daniel Hoffman <43101339+thedanhoffman@users.noreply.github.com> Date: Tue, 17 May 2022 13:09:38 -0500 Subject: [PATCH 12/46] Updated CPhysicalJoin to derive the inner distribution in the case of tainted replicated (#13177) Previously, CPhysicalJoin derived the outer distribution when it was tainted replicated. It checked only for strict replicated and universal replicated and returned the inner distribution in these cases (in this case, it satisfies random). Tainted replicated wasn't considered and was causing an undercount (the JOIN derived tainted replicated instead of random, which was causing the number of columns to be undercounted, because it wrongly assumed that one segment contained all output columns). Co-authored-by: Daniel Hoffman --- .../include/gpopt/base/CDistributionSpec.h | 2 +- .../libgpopt/src/operators/CPhysicalJoin.cpp | 1 + src/test/regress/expected/rpt.out | 189 ++++++++++++++++++ src/test/regress/expected/rpt_optimizer.out | 185 +++++++++++++++++ src/test/regress/sql/rpt.sql | 65 ++++++ 5 files changed, 441 insertions(+), 1 deletion(-) diff --git a/src/backend/gporca/libgpopt/include/gpopt/base/CDistributionSpec.h b/src/backend/gporca/libgpopt/include/gpopt/base/CDistributionSpec.h index b50c15e7973..7f1131b6023 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/base/CDistributionSpec.h +++ b/src/backend/gporca/libgpopt/include/gpopt/base/CDistributionSpec.h @@ -41,7 +41,7 @@ class CDistributionSpec : public CPropSpec EdtStrictHashed, // same as hashed, used to force multiple slices for parallel union all. The motions mirror the distribution of the output columns. EdtStrictReplicated, // data is strictly replicated across all segments EdtReplicated, // data is strict or tainted replicated (required only) - EdtTaintedReplicated, // data once-replicated, after being processed by an input-order-sensitive operator (derived only) + EdtTaintedReplicated, // data once-replicated, after being processed by an input-order-sensitive operator or volatile function (derived only) EdtAny, // data can be anywhere on the segments (required only) EdtSingleton, // data is on a single segment or the master EdtStrictSingleton, // data is on a single segment or the master (derived only, only compatible with other singleton distributions) diff --git a/src/backend/gporca/libgpopt/src/operators/CPhysicalJoin.cpp b/src/backend/gporca/libgpopt/src/operators/CPhysicalJoin.cpp index d2bc3c3ca16..e85bf60f96d 100644 --- a/src/backend/gporca/libgpopt/src/operators/CPhysicalJoin.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CPhysicalJoin.cpp @@ -404,6 +404,7 @@ CPhysicalJoin::PdsDerive(CMemoryPool *mp, CExpressionHandle &exprhdl) const CDistributionSpec *pds; if (CDistributionSpec::EdtStrictReplicated == pdsOuter->Edt() || + CDistributionSpec::EdtTaintedReplicated == pdsOuter->Edt() || CDistributionSpec::EdtUniversal == pdsOuter->Edt()) { // if outer is replicated/universal, return inner distribution diff --git a/src/test/regress/expected/rpt.out b/src/test/regress/expected/rpt.out index 6584a6180ee..661e47e9b7c 100644 --- a/src/test/regress/expected/rpt.out +++ b/src/test/regress/expected/rpt.out @@ -999,6 +999,195 @@ explain (costs off) select * from t_hashdist cross join (select * from t_replica Optimizer: Postgres query optimizer (9 rows) +-- ORCA +-- verify that JOIN derives the inner child distribution if the outer is tainted replicated (in this +-- case, the inner child is the hash distributed table, but the distribution is random because the +-- hash distribution key is not the JOIN key. we want to return the inner distribution because the +-- JOIN key determines the distribution of the JOIN output). +create table dist_tab (a integer, b integer) distributed by (a); +create table rep_tab (c integer) distributed replicated; +create index idx on dist_tab (b); +insert into dist_tab values (1, 2), (2, 2), (2, 1), (1, 1); +insert into rep_tab values (1), (2); +analyze dist_tab; +analyze rep_tab; +set optimizer_enable_hashjoin=off; +set enable_hashjoin=off; +set enable_nestloop=on; +explain select b from dist_tab where b in (select distinct c from rep_tab); + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=10000000001.16..10000000021.44 rows=4 width=4) + -> Nested Loop (cost=10000000001.16..10000000021.39 rows=1 width=4) + -> Unique (cost=10000000001.03..10000000001.04 rows=2 width=4) + Group Key: rep_tab.c + -> Sort (cost=10000000001.03..10000000001.03 rows=2 width=4) + Sort Key: rep_tab.c + -> Seq Scan on rep_tab (cost=10000000000.00..10000000001.02 rows=2 width=4) + -> Index Only Scan using idx on dist_tab (cost=0.13..10.16 rows=1 width=4) + Index Cond: (b = rep_tab.c) + Optimizer: Postgres query optimizer +(10 rows) + +select b from dist_tab where b in (select distinct c from rep_tab); + b +--- + 1 + 2 + 1 + 2 +(4 rows) + +reset optimizer_enable_hashjoin; +reset enable_hashjoin; +reset enable_nestloop; +create table rand_tab (d integer) distributed randomly; +insert into rand_tab values (1), (2); +analyze rand_tab; +-- Table Side Derives +-- rep_tab pdsOuter EdtTaintedReplicated +-- rep_tab pdsInner EdtHashed +-- +-- join derives EdtHashed +explain select c from rep_tab where c in (select distinct c from rep_tab); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=20000000002.14..20000000002.14 rows=3 width=4) + -> Hash Join (cost=20000000001.08..20000000002.14 rows=3 width=4) + Hash Cond: (rep_tab.c = rep_tab_1.c) + -> Seq Scan on rep_tab (cost=10000000000.00..10000000001.02 rows=2 width=4) + -> Hash (cost=10000000001.06..10000000001.06 rows=2 width=4) + -> Unique (cost=10000000001.03..10000000001.04 rows=2 width=4) + Group Key: rep_tab_1.c + -> Sort (cost=10000000001.03..10000000001.03 rows=2 width=4) + Sort Key: rep_tab_1.c + -> Seq Scan on rep_tab rep_tab_1 (cost=10000000000.00..10000000001.02 rows=2 width=4) + Optimizer: Postgres query optimizer +(11 rows) + +select c from rep_tab where c in (select distinct c from rep_tab); + c +--- + 1 + 2 +(2 rows) + +-- Table Side Derives +-- dist_tab pdsOuter EdtHashed +-- rep_tab pdsInner EdtTaintedReplicated +-- +-- join derives EdtHashed +explain select a from dist_tab where a in (select distinct c from rep_tab); + QUERY PLAN +--------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=20000000001.08..20000000002.17 rows=4 width=4) + -> Hash Join (cost=20000000001.08..20000000002.12 rows=1 width=4) + Hash Cond: (dist_tab.a = rep_tab.c) + -> Seq Scan on dist_tab (cost=10000000000.00..10000000001.01 rows=1 width=4) + -> Hash (cost=10000000001.06..10000000001.06 rows=2 width=4) + -> Unique (cost=10000000001.03..10000000001.04 rows=2 width=4) + Group Key: rep_tab.c + -> Sort (cost=10000000001.03..10000000001.03 rows=2 width=4) + Sort Key: rep_tab.c + -> Seq Scan on rep_tab (cost=10000000000.00..10000000001.02 rows=2 width=4) + Optimizer: Postgres query optimizer +(11 rows) + +select a from dist_tab where a in (select distinct c from rep_tab); + a +--- + 2 + 2 + 1 + 1 +(4 rows) + +-- Table Side Derives +-- rand_tab pdsOuter EdtRandom +-- rep_tab pdsInner EdtTaintedReplicated +-- +-- join derives EdtRandom +explain select d from rand_tab where d in (select distinct c from rep_tab); + QUERY PLAN +--------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=20000000001.08..20000000002.15 rows=3 width=4) + -> Hash Join (cost=20000000001.08..20000000002.11 rows=1 width=4) + Hash Cond: (rand_tab.d = rep_tab.c) + -> Seq Scan on rand_tab (cost=10000000000.00..10000000001.01 rows=1 width=4) + -> Hash (cost=10000000001.06..10000000001.06 rows=2 width=4) + -> Unique (cost=10000000001.03..10000000001.04 rows=2 width=4) + Group Key: rep_tab.c + -> Sort (cost=10000000001.03..10000000001.03 rows=2 width=4) + Sort Key: rep_tab.c + -> Seq Scan on rep_tab (cost=10000000000.00..10000000001.02 rows=2 width=4) + Optimizer: Postgres query optimizer +(11 rows) + +select d from rand_tab where d in (select distinct c from rep_tab); + d +--- + 1 + 2 +(2 rows) + +-- Table Side Derives +-- rep_tab pdsOuter EdtTaintedReplicated +-- dist_tab pdsInner EdtHashed +-- +-- join derives EdtHashed +explain select c from rep_tab where c in (select distinct a from dist_tab); + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=20000000001.05..20000000002.14 rows=3 width=4) + -> Hash Join (cost=20000000001.05..20000000002.09 rows=1 width=4) + Hash Cond: (rep_tab.c = dist_tab.a) + -> Seq Scan on rep_tab (cost=10000000000.00..10000000001.02 rows=2 width=4) + -> Hash (cost=10000000001.04..10000000001.04 rows=1 width=4) + -> Unique (cost=10000000001.02..10000000001.03 rows=1 width=4) + Group Key: dist_tab.a + -> Sort (cost=10000000001.02..10000000001.03 rows=1 width=4) + Sort Key: dist_tab.a + -> Seq Scan on dist_tab (cost=10000000000.00..10000000001.01 rows=1 width=4) + Optimizer: Postgres query optimizer +(11 rows) + +select c from rep_tab where c in (select distinct a from dist_tab); + c +--- + 1 + 2 +(2 rows) + +-- Table Side Derives +-- rep_tab pdsOuter EdtTaintedReplicated +-- rand_tab pdsInner EdtHashed +-- +-- join derives EdtHashed +explain select c from rep_tab where c in (select distinct d from rand_tab); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=20000000001.07..20000000002.15 rows=3 width=4) + -> Hash Join (cost=20000000001.07..20000000002.11 rows=1 width=4) + Hash Cond: (rep_tab.c = rand_tab.d) + -> Seq Scan on rep_tab (cost=10000000000.00..10000000001.02 rows=2 width=4) + -> Hash (cost=10000000001.06..10000000001.06 rows=1 width=4) + -> Unique (cost=10000000001.04..10000000001.05 rows=1 width=4) + Group Key: rand_tab.d + -> Sort (cost=10000000001.04..10000000001.05 rows=1 width=4) + Sort Key: rand_tab.d + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=10000000000.00..10000000001.03 rows=1 width=4) + Hash Key: rand_tab.d + -> Seq Scan on rand_tab (cost=10000000000.00..10000000001.01 rows=1 width=4) + Optimizer: Postgres query optimizer +(13 rows) + +select c from rep_tab where c in (select distinct d from rand_tab); + c +--- + 1 + 2 +(2 rows) + -- start_ignore drop schema rpt cascade; NOTICE: drop cascades to 7 other objects diff --git a/src/test/regress/expected/rpt_optimizer.out b/src/test/regress/expected/rpt_optimizer.out index 5f3854b66ba..f2e58660a10 100644 --- a/src/test/regress/expected/rpt_optimizer.out +++ b/src/test/regress/expected/rpt_optimizer.out @@ -992,6 +992,191 @@ explain (costs off) select * from t_hashdist cross join (select * from t_replica Optimizer: Pivotal Optimizer (GPORCA) (9 rows) +-- ORCA +-- verify that JOIN derives the inner child distribution if the outer is tainted replicated (in this +-- case, the inner child is the hash distributed table, but the distribution is random because the +-- hash distribution key is not the JOIN key. we want to return the inner distribution because the +-- JOIN key determines the distribution of the JOIN output). +create table dist_tab (a integer, b integer) distributed by (a); +create table rep_tab (c integer) distributed replicated; +create index idx on dist_tab (b); +insert into dist_tab values (1, 2), (2, 2), (2, 1), (1, 1); +insert into rep_tab values (1), (2); +analyze dist_tab; +analyze rep_tab; +set optimizer_enable_hashjoin=off; +set enable_hashjoin=off; +set enable_nestloop=on; +explain select b from dist_tab where b in (select distinct c from rep_tab); + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..443.00 rows=4 width=4) + -> Nested Loop (cost=0.00..443.00 rows=2 width=4) + Join Filter: true + -> GroupAggregate (cost=0.00..431.00 rows=2 width=4) + Group Key: rep_tab.c + -> Sort (cost=0.00..431.00 rows=2 width=4) + Sort Key: rep_tab.c + -> Seq Scan on rep_tab (cost=0.00..431.00 rows=2 width=4) + -> Index Scan using idx on dist_tab (cost=0.00..12.00 rows=1 width=4) + Index Cond: (b = rep_tab.c) + Optimizer: Pivotal Optimizer (GPORCA) +(11 rows) + +select b from dist_tab where b in (select distinct c from rep_tab); + b +--- + 1 + 2 + 1 + 2 +(4 rows) + +reset optimizer_enable_hashjoin; +reset enable_hashjoin; +reset enable_nestloop; +create table rand_tab (d integer) distributed randomly; +insert into rand_tab values (1), (2); +analyze rand_tab; +-- Table Side Derives +-- rep_tab pdsOuter EdtTaintedReplicated +-- rep_tab pdsInner EdtHashed +-- +-- join derives EdtHashed +explain select c from rep_tab where c in (select distinct c from rep_tab); + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=4) + -> Hash Semi Join (cost=0.00..862.00 rows=1 width=4) + Hash Cond: (rep_tab.c = rep_tab_1.c) + -> Result (cost=0.00..431.00 rows=1 width=4) + -> Seq Scan on rep_tab (cost=0.00..431.00 rows=2 width=4) + -> Hash (cost=431.00..431.00 rows=1 width=4) + -> Redistribute Motion 1:3 (slice2; segments: 1) (cost=0.00..431.00 rows=1 width=4) + Hash Key: rep_tab_1.c + -> GroupAggregate (cost=0.00..431.00 rows=6 width=4) + Group Key: rep_tab_1.c + -> Sort (cost=0.00..431.00 rows=6 width=4) + Sort Key: rep_tab_1.c + -> Seq Scan on rep_tab rep_tab_1 (cost=0.00..431.00 rows=6 width=4) + Optimizer: Pivotal Optimizer (GPORCA) +(14 rows) + +select c from rep_tab where c in (select distinct c from rep_tab); + c +--- + 2 + 1 +(2 rows) + +-- Table Side Derives +-- dist_tab pdsOuter EdtHashed +-- rep_tab pdsInner EdtTaintedReplicated +-- +-- join derives EdtHashed +explain select a from dist_tab where a in (select distinct c from rep_tab); + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=4 width=4) + -> Hash Semi Join (cost=0.00..862.00 rows=2 width=4) + Hash Cond: (dist_tab.a = rep_tab.c) + -> Seq Scan on dist_tab (cost=0.00..431.00 rows=2 width=4) + -> Hash (cost=431.00..431.00 rows=2 width=4) + -> Seq Scan on rep_tab (cost=0.00..431.00 rows=2 width=4) + Optimizer: Pivotal Optimizer (GPORCA) +(7 rows) + +select a from dist_tab where a in (select distinct c from rep_tab); + a +--- + 1 + 1 + 2 + 2 +(4 rows) + +-- Table Side Derives +-- rand_tab pdsOuter EdtRandom +-- rep_tab pdsInner EdtTaintedReplicated +-- +-- join derives EdtRandom +explain select d from rand_tab where d in (select distinct c from rep_tab); + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=4) + -> Hash Semi Join (cost=0.00..862.00 rows=1 width=4) + Hash Cond: (rand_tab.d = rep_tab.c) + -> Seq Scan on rand_tab (cost=0.00..431.00 rows=1 width=4) + -> Hash (cost=431.00..431.00 rows=2 width=4) + -> Seq Scan on rep_tab (cost=0.00..431.00 rows=2 width=4) + Optimizer: Pivotal Optimizer (GPORCA) +(7 rows) + +select d from rand_tab where d in (select distinct c from rep_tab); + d +--- + 1 + 2 +(2 rows) + +-- Table Side Derives +-- rep_tab pdsOuter EdtTaintedReplicated +-- dist_tab pdsInner EdtHashed +-- +-- join derives EdtHashed +explain select c from rep_tab where c in (select distinct a from dist_tab); + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=4) + -> Hash Join (cost=0.00..862.00 rows=1 width=4) + Hash Cond: (dist_tab.a = rep_tab.c) + -> GroupAggregate (cost=0.00..431.00 rows=1 width=4) + Group Key: dist_tab.a + -> Sort (cost=0.00..431.00 rows=2 width=4) + Sort Key: dist_tab.a + -> Seq Scan on dist_tab (cost=0.00..431.00 rows=2 width=4) + -> Hash (cost=431.00..431.00 rows=2 width=4) + -> Seq Scan on rep_tab (cost=0.00..431.00 rows=2 width=4) + Optimizer: Pivotal Optimizer (GPORCA) +(11 rows) + +select c from rep_tab where c in (select distinct a from dist_tab); + c +--- + 1 + 2 +(2 rows) + +-- Table Side Derives +-- rep_tab pdsOuter EdtTaintedReplicated +-- rand_tab pdsInner EdtHashed +-- +-- join derives EdtHashed +explain select c from rep_tab where c in (select distinct d from rand_tab); + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=4) + -> Hash Join (cost=0.00..862.00 rows=1 width=4) + Hash Cond: (rand_tab.d = rep_tab.c) + -> GroupAggregate (cost=0.00..431.00 rows=1 width=4) + Group Key: rand_tab.d + -> Sort (cost=0.00..431.00 rows=1 width=4) + Sort Key: rand_tab.d + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) + Hash Key: rand_tab.d + -> Seq Scan on rand_tab (cost=0.00..431.00 rows=1 width=4) + -> Hash (cost=431.00..431.00 rows=2 width=4) + -> Seq Scan on rep_tab (cost=0.00..431.00 rows=2 width=4) + Optimizer: Pivotal Optimizer (GPORCA) +(13 rows) + +select c from rep_tab where c in (select distinct d from rand_tab); + c +--- + 2 + 1 +(2 rows) + -- start_ignore drop schema rpt cascade; NOTICE: drop cascades to 7 other objects diff --git a/src/test/regress/sql/rpt.sql b/src/test/regress/sql/rpt.sql index 1912101f330..bc8bb59d2b3 100644 --- a/src/test/regress/sql/rpt.sql +++ b/src/test/regress/sql/rpt.sql @@ -439,6 +439,71 @@ explain (costs off) update t_replicate_volatile set a = random(); explain (costs off) insert into t_replicate_volatile select * from t_replicate_volatile limit random(); explain (costs off) select * from t_hashdist cross join (select * from t_replicate_volatile limit random()) x; +-- ORCA +-- verify that JOIN derives the inner child distribution if the outer is tainted replicated (in this +-- case, the inner child is the hash distributed table, but the distribution is random because the +-- hash distribution key is not the JOIN key. we want to return the inner distribution because the +-- JOIN key determines the distribution of the JOIN output). +create table dist_tab (a integer, b integer) distributed by (a); +create table rep_tab (c integer) distributed replicated; +create index idx on dist_tab (b); +insert into dist_tab values (1, 2), (2, 2), (2, 1), (1, 1); +insert into rep_tab values (1), (2); +analyze dist_tab; +analyze rep_tab; +set optimizer_enable_hashjoin=off; +set enable_hashjoin=off; +set enable_nestloop=on; +explain select b from dist_tab where b in (select distinct c from rep_tab); +select b from dist_tab where b in (select distinct c from rep_tab); +reset optimizer_enable_hashjoin; +reset enable_hashjoin; +reset enable_nestloop; + +create table rand_tab (d integer) distributed randomly; +insert into rand_tab values (1), (2); +analyze rand_tab; + +-- Table Side Derives +-- rep_tab pdsOuter EdtTaintedReplicated +-- rep_tab pdsInner EdtHashed +-- +-- join derives EdtHashed +explain select c from rep_tab where c in (select distinct c from rep_tab); +select c from rep_tab where c in (select distinct c from rep_tab); + +-- Table Side Derives +-- dist_tab pdsOuter EdtHashed +-- rep_tab pdsInner EdtTaintedReplicated +-- +-- join derives EdtHashed +explain select a from dist_tab where a in (select distinct c from rep_tab); +select a from dist_tab where a in (select distinct c from rep_tab); + +-- Table Side Derives +-- rand_tab pdsOuter EdtRandom +-- rep_tab pdsInner EdtTaintedReplicated +-- +-- join derives EdtRandom +explain select d from rand_tab where d in (select distinct c from rep_tab); +select d from rand_tab where d in (select distinct c from rep_tab); + +-- Table Side Derives +-- rep_tab pdsOuter EdtTaintedReplicated +-- dist_tab pdsInner EdtHashed +-- +-- join derives EdtHashed +explain select c from rep_tab where c in (select distinct a from dist_tab); +select c from rep_tab where c in (select distinct a from dist_tab); + +-- Table Side Derives +-- rep_tab pdsOuter EdtTaintedReplicated +-- rand_tab pdsInner EdtHashed +-- +-- join derives EdtHashed +explain select c from rep_tab where c in (select distinct d from rand_tab); +select c from rep_tab where c in (select distinct d from rand_tab); + -- start_ignore drop schema rpt cascade; -- end_ignore From 0fb6285833db05a4ea24f6b5eb5f977c42470c89 Mon Sep 17 00:00:00 2001 From: Zhenghua Lyu Date: Mon, 23 May 2022 14:26:32 +0800 Subject: [PATCH 13/46] Do not turn to singleQE for SegmentGeneral path refs outer Params. In planner, If a SegmentGeneral pat contains volatile expressions, it cannot be taken as General, and we will try to make it SingleQE by adding a motion (if this motion is not needed, it will be removed later). But a corner case is that if the path refs outer Params then it cannot be motion-ed. This commit fixes the issue by not trying to bring to singleQE for segmentgeneral path that refs outer Params. See Github Issue 13532 for details. --- src/backend/cdb/cdbpath.c | 6 ++++ src/test/regress/expected/rpt.out | 36 +++++++++++++++++++++ src/test/regress/expected/rpt_optimizer.out | 31 ++++++++++++++++++ src/test/regress/sql/rpt.sql | 8 +++++ 4 files changed, 81 insertions(+) diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index 4d8235b6334..6c23fb90ce3 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -2819,6 +2819,12 @@ turn_volatile_seggen_to_singleqe(PlannerInfo *root, Path *path, Node *node) CdbPathLocus_MakeSingleQE(&singleQE, CdbPathLocus_NumSegments(path->locus)); mpath = cdbpath_create_motion_path(root, path, NIL, false, singleQE); + /* + * mpath might be NULL, like path contain outer Params + * See Github Issue 13532 for details. + */ + if (mpath == NULL) + return path; ppath = create_projection_path_with_quals(root, mpath->parent, mpath, mpath->pathtarget, NIL, false); ppath->force = true; diff --git a/src/test/regress/expected/rpt.out b/src/test/regress/expected/rpt.out index 661e47e9b7c..a3d386c5ac7 100644 --- a/src/test/regress/expected/rpt.out +++ b/src/test/regress/expected/rpt.out @@ -1188,6 +1188,42 @@ select c from rep_tab where c in (select distinct d from rand_tab); 2 (2 rows) +-- Github Issue 13532 +create table t1_13532(a int, b int) distributed replicated; +create table t2_13532(a int, b int) distributed replicated; +create index idx_t2_13532 on t2_13532(b); +explain (costs off) select * from t1_13532 x, t2_13532 y where y.a < random() and x.b = y.b; + QUERY PLAN +---------------------------------------------------------------------- + Hash Join + Hash Cond: (x.b = y.b) + -> Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_13532 x + -> Hash + -> Result + -> Gather Motion 1:1 (slice2; segments: 1) + -> Bitmap Heap Scan on t2_13532 y + Filter: ((a)::double precision < random()) + -> Bitmap Index Scan on idx_t2_13532 + Optimizer: Postgres query optimizer +(11 rows) + +set enable_bitmapscan = off; +explain (costs off) select * from t1_13532 x, t2_13532 y where y.a < random() and x.b = y.b; + QUERY PLAN +---------------------------------------------------------------------- + Hash Join + Hash Cond: (x.b = y.b) + -> Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_13532 x + -> Hash + -> Result + -> Gather Motion 1:1 (slice2; segments: 1) + -> Index Scan using idx_t2_13532 on t2_13532 y + Filter: ((a)::double precision < random()) + Optimizer: Postgres query optimizer +(10 rows) + -- start_ignore drop schema rpt cascade; NOTICE: drop cascades to 7 other objects diff --git a/src/test/regress/expected/rpt_optimizer.out b/src/test/regress/expected/rpt_optimizer.out index f2e58660a10..757fcd4d2ea 100644 --- a/src/test/regress/expected/rpt_optimizer.out +++ b/src/test/regress/expected/rpt_optimizer.out @@ -1177,6 +1177,37 @@ select c from rep_tab where c in (select distinct d from rand_tab); 1 (2 rows) +-- Github Issue 13532 +create table t1_13532(a int, b int) distributed replicated; +create table t2_13532(a int, b int) distributed replicated; +create index idx_t2_13532 on t2_13532(b); +explain (costs off) select * from t1_13532 x, t2_13532 y where y.a < random() and x.b = y.b; + QUERY PLAN +---------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Nested Loop + Join Filter: true + -> Seq Scan on t1_13532 + -> Index Scan using idx_t2_13532 on t2_13532 + Index Cond: (b = t1_13532.b) + Filter: ((a)::double precision < random()) + Optimizer: Pivotal Optimizer (GPORCA) +(8 rows) + +set enable_bitmapscan = off; +explain (costs off) select * from t1_13532 x, t2_13532 y where y.a < random() and x.b = y.b; + QUERY PLAN +---------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Nested Loop + Join Filter: true + -> Seq Scan on t1_13532 + -> Index Scan using idx_t2_13532 on t2_13532 + Index Cond: (b = t1_13532.b) + Filter: ((a)::double precision < random()) + Optimizer: Pivotal Optimizer (GPORCA) +(8 rows) + -- start_ignore drop schema rpt cascade; NOTICE: drop cascades to 7 other objects diff --git a/src/test/regress/sql/rpt.sql b/src/test/regress/sql/rpt.sql index bc8bb59d2b3..42ae58242b8 100644 --- a/src/test/regress/sql/rpt.sql +++ b/src/test/regress/sql/rpt.sql @@ -504,6 +504,14 @@ select c from rep_tab where c in (select distinct a from dist_tab); explain select c from rep_tab where c in (select distinct d from rand_tab); select c from rep_tab where c in (select distinct d from rand_tab); +-- Github Issue 13532 +create table t1_13532(a int, b int) distributed replicated; +create table t2_13532(a int, b int) distributed replicated; +create index idx_t2_13532 on t2_13532(b); +explain (costs off) select * from t1_13532 x, t2_13532 y where y.a < random() and x.b = y.b; +set enable_bitmapscan = off; +explain (costs off) select * from t1_13532 x, t2_13532 y where y.a < random() and x.b = y.b; + -- start_ignore drop schema rpt cascade; -- end_ignore From 0ed7e024e488e30a1285f26a3de1ab3226f1cb25 Mon Sep 17 00:00:00 2001 From: tangtao Date: Mon, 16 May 2022 20:20:48 +0800 Subject: [PATCH 14/46] reject ambigous 5-digits date in non-standard YYYMMDD format The 5-digits date string was invalid and would be rejected on GPDB5. But then the upstream pg modified the date parsing logic, which would make it parsed as YYYMMMDD. As it's not a standard timeformat and the change causes gp6+ to behave differently from previous version. this commit lets gp reject it by default. And if the pg-like date parsing required, we can set the value of GUC gp_allow_date_field_width_5digits to true. --- src/backend/utils/adt/datetime.c | 3 ++ src/backend/utils/misc/guc_gp.c | 13 +++++++++ src/include/utils/guc.h | 2 ++ src/include/utils/sync_guc_name.h | 1 + src/test/regress/expected/date.out | 45 ++++++++++++++++++++++++++++++ src/test/regress/sql/date.sql | 33 ++++++++++++++++++++++ 6 files changed, 97 insertions(+) diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c index 67a4e727058..841013b5d3c 100644 --- a/src/backend/utils/adt/datetime.c +++ b/src/backend/utils/adt/datetime.c @@ -27,6 +27,7 @@ #include "nodes/nodeFuncs.h" #include "utils/builtins.h" #include "utils/date.h" +#include "utils/guc.h" #include "utils/datetime.h" #include "utils/memutils.h" #include "utils/tzparser.h" @@ -2904,6 +2905,8 @@ DecodeNumberField(int len, char *str, int fmask, tm->tm_year = atoi(str); if ((len - 4) == 2) *is2digits = true; + else if (((len - 4 ) == 3) && !gp_allow_date_field_width_5digits) + return DTERR_BAD_FORMAT; return DTK_DATE; } diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index b2c25f00360..5e17c324760 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -439,6 +439,9 @@ bool enable_answer_query_using_materialized_views = false; bool gp_log_endpoints = false; +/* optional reject to parse ambigous 5-digits date in YYYMMDD format */ +bool gp_allow_date_field_width_5digits = false; + static const struct config_enum_entry gp_log_format_options[] = { {"text", 0}, {"csv", 1}, @@ -2886,6 +2889,16 @@ struct config_bool ConfigureNamesBool_gp[] = NULL, NULL, NULL }, + { + {"gp_allow_date_field_width_5digits", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS, + gettext_noop("Allow parsing input date field with exactly continous 5 digits in non-standard YYYMMDD timeformat (follow pg12+ behave)"), + NULL, + GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE + }, + &gp_allow_date_field_width_5digits, + false, + NULL, NULL, NULL + }, { {"optimizer_enable_eageragg", PGC_USERSET, DEVELOPER_OPTIONS, gettext_noop("Enable Eager Agg transform for pushing aggregate below an innerjoin."), diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 4cd336f4909..ad247dd940d 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -624,6 +624,8 @@ extern int gp_predicate_pushdown_sample_rows; extern bool gp_log_endpoints; +extern bool gp_allow_date_field_width_5digits; + typedef enum { INDEX_CHECK_NONE, diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 02419fc8c22..e0d20bbef65 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -27,6 +27,7 @@ "force_parallel_mode", "gin_fuzzy_search_limit", "gin_pending_list_limit", + "gp_allow_date_field_width_5digits", "gp_blockdirectory_entry_min_range", "gp_blockdirectory_minipage_size", "gp_debug_linger", diff --git a/src/test/regress/expected/date.out b/src/test/regress/expected/date.out index c8b0566ff40..79be471d758 100644 --- a/src/test/regress/expected/date.out +++ b/src/test/regress/expected/date.out @@ -316,6 +316,21 @@ SELECT date '1999 08 01'; 1999-08-01 (1 row) +-- Test guc gp_allow_date_field_width_5digits +-- should error out +SELECT date '2020516'; +ERROR: invalid input syntax for type date: "2020516" +LINE 1: SELECT date '2020516'; + ^ +SET gp_allow_date_field_width_5digits=on; +-- should parsed to 0202-05-16 ( non-standard YYYMMDD ) +SELECT date '2020516'; + date +------------ + 0202-05-16 +(1 row) + +RESET gp_allow_date_field_width_5digits; SET datestyle TO dmy; SELECT date 'January 8, 1999'; date @@ -568,6 +583,21 @@ SELECT date '1999 08 01'; 1999-08-01 (1 row) +-- Test guc gp_allow_date_field_width_5digits +-- should error out +SELECT date '2020516'; +ERROR: invalid input syntax for type date: "2020516" +LINE 1: SELECT date '2020516'; + ^ +SET gp_allow_date_field_width_5digits=on; +-- should parsed to 0202-05-16 ( non-standard YYYMMDD ) +SELECT date '2020516'; + date +------------ + 0202-05-16 +(1 row) + +RESET gp_allow_date_field_width_5digits; SET datestyle TO mdy; SELECT date 'January 8, 1999'; date @@ -840,6 +870,21 @@ SELECT date '5874898-01-01'; -- out of range ERROR: date out of range: "5874898-01-01" LINE 1: SELECT date '5874898-01-01'; ^ +-- Test guc gp_allow_date_field_width_5digits +-- should error out +SELECT date '2020516'; +ERROR: invalid input syntax for type date: "2020516" +LINE 1: SELECT date '2020516'; + ^ +SET gp_allow_date_field_width_5digits=on; +-- should parsed to 0202-05-16 ( non-standard YYYMMDD ) +SELECT date '2020516'; + date +------------ + 0202-05-16 +(1 row) + +RESET gp_allow_date_field_width_5digits; RESET datestyle; -- -- Simple math diff --git a/src/test/regress/sql/date.sql b/src/test/regress/sql/date.sql index 8f7435b767c..3c97b79e71c 100644 --- a/src/test/regress/sql/date.sql +++ b/src/test/regress/sql/date.sql @@ -85,6 +85,17 @@ SELECT date '01 08 1999'; SELECT date '99 08 01'; SELECT date '1999 08 01'; +-- Test guc gp_allow_date_field_width_5digits +-- should error out +SELECT date '2020516'; + +SET gp_allow_date_field_width_5digits=on; + +-- should parsed to 0202-05-16 ( non-standard YYYMMDD ) +SELECT date '2020516'; + +RESET gp_allow_date_field_width_5digits; + SET datestyle TO dmy; SELECT date 'January 8, 1999'; @@ -136,6 +147,17 @@ SELECT date '01 08 1999'; SELECT date '99 08 01'; SELECT date '1999 08 01'; +-- Test guc gp_allow_date_field_width_5digits +-- should error out +SELECT date '2020516'; + +SET gp_allow_date_field_width_5digits=on; + +-- should parsed to 0202-05-16 ( non-standard YYYMMDD ) +SELECT date '2020516'; + +RESET gp_allow_date_field_width_5digits; + SET datestyle TO mdy; SELECT date 'January 8, 1999'; @@ -193,6 +215,17 @@ SELECT date '4714-11-23 BC'; -- out of range SELECT date '5874897-12-31'; SELECT date '5874898-01-01'; -- out of range +-- Test guc gp_allow_date_field_width_5digits +-- should error out +SELECT date '2020516'; + +SET gp_allow_date_field_width_5digits=on; + +-- should parsed to 0202-05-16 ( non-standard YYYMMDD ) +SELECT date '2020516'; + +RESET gp_allow_date_field_width_5digits; + RESET datestyle; -- From 835fae015ded69e17c3c77323a38b5a3cb1bcb20 Mon Sep 17 00:00:00 2001 From: Aegeaner Date: Wed, 25 May 2022 15:53:48 +0800 Subject: [PATCH 15/46] Handling exception to call mppExecutorCleanup in standard_ExecutorStart (#12694) According to a reported error in PolicyEagerFreeAssignOperatorMemoryKB makes query end without calling mppExecutorCleanup #12690, the code path in `standard_ExecutorStart` didn't handle exception in `PolicyAutoAssignOperatorMemoryKB` and `PolicyEagerFreeAssignOperatorMemoryKB` calling, which may cause the OOM exception not to be handled in `standard_ExecutorStart` but throw to upper `PortalStart` methods, while there is also an exception handling mechanism in `PortalStart` but `mppExecutorCleanup` will not call because `portal->queryDesc` will be `NULL` in certain transaction states. This commit fixes it. --- src/backend/executor/execMain.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 0b96fe96a56..609529bc703 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -303,20 +303,29 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) if (!should_skip_operator_memory_assign) { - switch(*gp_resmanager_memory_policy) + PG_TRY(); { - case RESMANAGER_MEMORY_POLICY_AUTO: - PolicyAutoAssignOperatorMemoryKB(queryDesc->plannedstmt, + switch(*gp_resmanager_memory_policy) + { + case RESMANAGER_MEMORY_POLICY_AUTO: + PolicyAutoAssignOperatorMemoryKB(queryDesc->plannedstmt, queryDesc->plannedstmt->query_mem); - break; - case RESMANAGER_MEMORY_POLICY_EAGER_FREE: - PolicyEagerFreeAssignOperatorMemoryKB(queryDesc->plannedstmt, + break; + case RESMANAGER_MEMORY_POLICY_EAGER_FREE: + PolicyEagerFreeAssignOperatorMemoryKB(queryDesc->plannedstmt, queryDesc->plannedstmt->query_mem); - break; - default: - Assert(IsResManagerMemoryPolicyNone()); - break; + break; + default: + Assert(IsResManagerMemoryPolicyNone()); + break; + } + } + PG_CATCH(); + { + mppExecutorCleanup(queryDesc); + PG_RE_THROW(); } + PG_END_TRY(); } } From 8d51113903319611588a7e914d0e0aa155d71f6d Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty Date: Thu, 2 Jun 2022 09:25:15 -0700 Subject: [PATCH 16/46] Support both unicast and wildcard address binding 790c7bac695 changed our address binding strategy to use a unicast address (segment's gp_segment_configuration.address) instead of the wildcard address, to reduce port usage on segment hosts and to ensure that we don't inadvertently use a slower network interface for interconnect traffic. In some cases, inter-segment communication using the unicast address mentioned above, may not be possible. One such example is if the source segment's address field and the destination segment's address field are on different subnets and/or existing routing rules don't allow for such communication. In these cases, using a wildcard address for address binding is the only available fallback, enabling the use of any network interface compliant with routing rules. Thus, this commit introduces the gp_interconnect_address_type GUC to support both kinds of address binding. We pick the default to be "unicast", as that is the only reasonable way to ensure that the segment's address field is used for fast interconnect communication and to keep port usage manageable on large clusters with highly concurrent workloads. Testing notes: VM setup: one coordinator node, two segment nodes. All nodes are connected through three networks. Gp segment config: coordinator node has one coordinator. Each segment node has two primaries. No mirrors. Coordinator uses a dedicated network. Two primaries on a segment node each uses one of the other two networks. With 'unicast', we fail to send packets due to the network structure: WARNING: interconnect may encountered a network error, please check your network Falling back to 'wildcard', we see that packets can be sent successfully across motions. Co-authored-by: Huansong Fu --- contrib/interconnect/tcp/ic_tcp.c | 25 +++++++++++++++---------- contrib/interconnect/udp/ic_udpifc.c | 25 +++++++++++++++---------- src/backend/cdb/cdbutil.c | 18 ++++++++++++++++++ src/backend/cdb/cdbvars.c | 1 + src/backend/utils/misc/guc_gp.c | 16 ++++++++++++++++ src/include/cdb/cdbvars.h | 27 +++++++++++++++++++++++++++ src/include/utils/sync_guc_name.h | 1 + 7 files changed, 93 insertions(+), 20 deletions(-) diff --git a/contrib/interconnect/tcp/ic_tcp.c b/contrib/interconnect/tcp/ic_tcp.c index 3306f25467e..07926e61c32 100644 --- a/contrib/interconnect/tcp/ic_tcp.c +++ b/contrib/interconnect/tcp/ic_tcp.c @@ -143,16 +143,21 @@ setupTCPListeningSocket(int backlog, int *listenerSocketFd, int32 *listenerPort) hints.ai_protocol = 0; /* Any protocol - TCP implied for network use * due to SOCK_STREAM */ - /* - * Restrict what IP address we will listen on to just the one that was - * used to create this QE session. - */ - Assert(interconnect_address && strlen(interconnect_address) > 0); - hints.ai_flags |= AI_NUMERICHOST; - if (gp_log_interconnect >= GPVARS_VERBOSITY_DEBUG) - ereport(DEBUG1, - (errmsg("getaddrinfo called with interconnect_address %s", - interconnect_address))); + if (Gp_interconnect_address_type == INTERCONNECT_ADDRESS_TYPE_UNICAST) + { + Assert(interconnect_address && strlen(interconnect_address) > 0); + hints.ai_flags |= AI_NUMERICHOST; + ereportif(gp_log_interconnect >= GPVARS_VERBOSITY_DEBUG, DEBUG3, + (errmsg("getaddrinfo called with unicast address: %s", + interconnect_address))); + } + else + { + Assert(interconnect_address == NULL); + hints.ai_flags |= AI_PASSIVE; + ereportif(gp_log_interconnect >= GPVARS_VERBOSITY_DEBUG, DEBUG3, + (errmsg("getaddrinfo called with wildcard address"))); + } s = getaddrinfo(interconnect_address, service, &hints, &addrs); if (s != 0) diff --git a/contrib/interconnect/udp/ic_udpifc.c b/contrib/interconnect/udp/ic_udpifc.c index 4d4a3ca8f82..5c6331448b4 100644 --- a/contrib/interconnect/udp/ic_udpifc.c +++ b/contrib/interconnect/udp/ic_udpifc.c @@ -1201,16 +1201,21 @@ setupUDPListeningSocket(int *listenerSocketFd, int32 *listenerPort, int *txFamil #endif fun = "getaddrinfo"; - /* - * Restrict what IP address we will listen on to just the one that was - * used to create this QE session. - */ - Assert(interconnect_address && strlen(interconnect_address) > 0); - hints.ai_flags |= AI_NUMERICHOST; - if (gp_log_interconnect >= GPVARS_VERBOSITY_DEBUG) - ereport(DEBUG1, - (errmsg("getaddrinfo called with interconnect_address %s", - interconnect_address))); + if (Gp_interconnect_address_type == INTERCONNECT_ADDRESS_TYPE_UNICAST) + { + Assert(interconnect_address && strlen(interconnect_address) > 0); + hints.ai_flags |= AI_NUMERICHOST; + ereportif(gp_log_interconnect >= GPVARS_VERBOSITY_DEBUG, DEBUG3, + (errmsg("getaddrinfo called with unicast address: %s", + interconnect_address))); + } + else + { + Assert(interconnect_address == NULL); + hints.ai_flags |= AI_PASSIVE; + ereportif(gp_log_interconnect >= GPVARS_VERBOSITY_DEBUG, DEBUG3, + (errmsg("getaddrinfo called with wildcard address"))); + } s = getaddrinfo(interconnect_address, service, &hints, &addrs); if (s != 0) diff --git a/src/backend/cdb/cdbutil.c b/src/backend/cdb/cdbutil.c index 237b5603004..938b13b03c8 100644 --- a/src/backend/cdb/cdbutil.c +++ b/src/backend/cdb/cdbutil.c @@ -1035,9 +1035,27 @@ cdbcomponent_getComponentInfo(int contentId) static void ensureInterconnectAddress(void) { + /* + * If the address type is wildcard, there is no need to populate an unicast + * address in interconnect_address. + */ + if (Gp_interconnect_address_type == INTERCONNECT_ADDRESS_TYPE_WILDCARD) + { + interconnect_address = NULL; + return; + } + + Assert(Gp_interconnect_address_type == INTERCONNECT_ADDRESS_TYPE_UNICAST); + + /* If the unicast address has already been assigned, exit early. */ if (interconnect_address) return; + /* + * Retrieve the segment's gp_segment_configuration.address value, in order + * to setup interconnect_address + */ + if (GpIdentity.segindex >= 0) { Assert(Gp_role == GP_ROLE_EXECUTE); diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c index 3698914b02d..49a5b46dcbe 100644 --- a/src/backend/cdb/cdbvars.c +++ b/src/backend/cdb/cdbvars.c @@ -209,6 +209,7 @@ int Gp_interconnect_debug_retry_interval = 10; int interconnect_setup_timeout = 7200; int Gp_interconnect_type = INTERCONNECT_TYPE_UDPIFC; +int Gp_interconnect_address_type = INTERCONNECT_ADDRESS_TYPE_UNICAST; bool gp_interconnect_aggressive_retry = true; /* fast-track app-level * retry */ diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 5e17c324760..1e0a883a0bc 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -532,6 +532,12 @@ static const struct config_enum_entry gp_interconnect_types[] = { {NULL, 0} }; +static const struct config_enum_entry gp_interconnect_address_types[] = { + {"wildcard", INTERCONNECT_ADDRESS_TYPE_WILDCARD}, + {"unicast", INTERCONNECT_ADDRESS_TYPE_UNICAST}, + {NULL, 0} +}; + static const struct config_enum_entry gp_log_verbosity[] = { {"terse", GPVARS_VERBOSITY_TERSE}, {"off", GPVARS_VERBOSITY_OFF}, @@ -4907,6 +4913,16 @@ struct config_enum ConfigureNamesEnum_gp[] = NULL, NULL, NULL }, + { + {"gp_interconnect_address_type", PGC_BACKEND, GP_ARRAY_TUNING, + gettext_noop("Sets the interconnect address type used for inter-node communication."), + gettext_noop("Valid values are \"unicast\" and \"wildcard\"") + }, + &Gp_interconnect_address_type, + INTERCONNECT_ADDRESS_TYPE_UNICAST, gp_interconnect_address_types, + NULL, NULL, NULL + }, + { {"gp_log_fts", PGC_SIGHUP, LOGGING_WHAT, gettext_noop("Sets the verbosity of logged messages pertaining to fault probing."), diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index b2f3e52c05e..86781bf424b 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -308,6 +308,33 @@ typedef enum GpVars_Interconnect_Type extern int Gp_interconnect_type; +/* + * We support different strategies for address binding for sockets used for + * motion communication over the interconnect. + * + * One approach is to use an unicast address, specifically the segment's + * gp_segment_configuration.address field to perform the address binding. This + * has the benefits of reducing port usage on a segment host and ensures that + * the NIC backed by the address field is the only one used for communication + * (and not an externally facing slower NIC, like the ones that typically back + * the gp_segment_configuration.hostname field) + * + * In some cases, inter-segment communication using the unicast address + * mentioned above, may not be possible. One such example is if the source + * segment's address field and the destination segment's address field are on + * different subnets and/or existing routing rules don't allow for such + * communication. In these cases, using a wildcard address for address binding + * is the only available fallback, enabling the use of any network interface + * compliant with routing rules. + */ +typedef enum GpVars_Interconnect_Address_Type +{ + INTERCONNECT_ADDRESS_TYPE_UNICAST = 0, + INTERCONNECT_ADDRESS_TYPE_WILDCARD +} GpVars_Interconnect_Address_Type; + +extern int Gp_interconnect_address_type; + extern char *gp_interconnect_proxy_addresses; typedef enum GpVars_Interconnect_Method diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index e0d20bbef65..1b38e4a652d 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -58,6 +58,7 @@ "gp_interconnect_timer_period", "gp_interconnect_transmit_timeout", "gp_interconnect_type", + "gp_interconnect_address_type", "gp_log_endpoints", "gp_log_interconnect", "gp_log_resgroup_memory", From 3a0fdd13041550b9a829c4814ab7d9783d8d6bc9 Mon Sep 17 00:00:00 2001 From: dh-cloud <60729713+dh-cloud@users.noreply.github.com> Date: Tue, 14 Jun 2022 13:50:46 +0800 Subject: [PATCH 17/46] Fix assertion failure in InitPostgres when resgroup is on (#13643) `ResGroupActivated = true` is set at the ending of InitPostgres() by InitResManager(). If inside InitPostgres() some code before InitResManager() call palloc() failed, then call trace: gp_failed_to_alloc() -> VmemTracker_GetAvailableVmemMB() -> VmemTracker_GetNonNegativeAvailableVmemChunks -> VmemTracker_GetVmemLimitChunks It will trigger: VmemTracker_GetVmemLimitChunks() { AssertImply(vmemTrackerInited && IsResGroupEnabled(), IsResGroupActivated()); } Like commit c1cdb99d does, remove the AssertImply and add TODO comment. --- src/backend/utils/mmgr/vmem_tracker.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/backend/utils/mmgr/vmem_tracker.c b/src/backend/utils/mmgr/vmem_tracker.c index 20ef2d22c10..710ade8c69e 100644 --- a/src/backend/utils/mmgr/vmem_tracker.c +++ b/src/backend/utils/mmgr/vmem_tracker.c @@ -428,14 +428,12 @@ int32 VmemTracker_GetVmemLimitChunks(void) { /* + * TODO: * For backend who has vmem tracker initialized and resource * group enabled, the vmem limit is not expected to be used * until resource group is activated, otherwise, there might * be an inconsistency about the vmem limit. */ - AssertImply(vmemTrackerInited && IsResGroupEnabled(), - IsResGroupActivated()); - return IsResGroupEnabled() ? ResGroupGetVmemLimitChunks() : vmemChunksQuota; } From 053f28ab980d1dac2d846c516e3303580c1d8c2b Mon Sep 17 00:00:00 2001 From: FairyFar Date: Mon, 27 Jun 2022 09:29:43 +0800 Subject: [PATCH 18/46] removed meaningless code line in resgroup_helper.c (#13731) removed meaningless code line in resgroup_helper.c --- src/backend/utils/resgroup/resgroup_helper.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/backend/utils/resgroup/resgroup_helper.c b/src/backend/utils/resgroup/resgroup_helper.c index 2148ffe3e34..be83506ec33 100644 --- a/src/backend/utils/resgroup/resgroup_helper.c +++ b/src/backend/utils/resgroup/resgroup_helper.c @@ -237,8 +237,6 @@ pg_resgroup_get_status(PG_FUNCTION_ARGS) int ctxsize = sizeof(ResGroupStatCtx) + sizeof(ResGroupStat) * (MaxResourceGroups - 1); - (void) inGroupId; - funcctx->user_fctx = palloc(ctxsize); ctx = (ResGroupStatCtx *) funcctx->user_fctx; From c8436034dadc9ca2c967db5346b642b32830017c Mon Sep 17 00:00:00 2001 From: gpopt <103469259+gpopt@users.noreply.github.com> Date: Tue, 5 Jul 2022 16:34:55 -0700 Subject: [PATCH 19/46] Add an Orca GUC to control fallback for replicated table (#13763) This adds a GUC optimizer_enable_replicated_table, which defaults any DML operation on a replicated table to fall back to Postgres planner. optimizer_enable_replicated_table is on by default. Co-authored-by: Daniel Hoffman --- .../gpopt/translate/CTranslatorQueryToDXL.cpp | 10 ++++++++ src/backend/utils/misc/guc_gp.c | 13 ++++++++++- src/include/utils/guc.h | 1 + src/include/utils/unsync_guc_name.h | 1 + src/test/regress/expected/rpt.out | 21 +++++++++++++++++ src/test/regress/expected/rpt_optimizer.out | 23 +++++++++++++++++++ src/test/regress/sql/rpt.sql | 8 +++++++ 7 files changed, 76 insertions(+), 1 deletion(-) diff --git a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp index ed472443c98..49179691f89 100644 --- a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp @@ -88,6 +88,7 @@ using namespace gpmd; extern bool optimizer_enable_ctas; extern bool optimizer_enable_dml; extern bool optimizer_enable_dml_constraints; +extern bool optimizer_enable_replicated_table; extern bool optimizer_enable_multiple_distinct_aggs; // OIDs of variants of LEAD window function @@ -3306,6 +3307,15 @@ CTranslatorQueryToDXL::NoteDistributionPolicyOpclasses(const RangeTblEntry *rte) return; } + if (!optimizer_enable_replicated_table && + policy->ptype == POLICYTYPE_REPLICATED) + { + GPOS_RAISE( + gpdxl::ExmaMD, gpdxl::ExmiMDObjUnsupported, + GPOS_WSZ_LIT( + "Use optimizer_enable_replicated_table to enable replicated tables")); + } + int policy_nattrs = policy->nattrs; TupleDesc desc = rel->rd_att; bool contains_default_hashops = false; diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 1e0a883a0bc..c54528ac67d 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -341,7 +341,7 @@ bool optimizer_enable_mergejoin; bool optimizer_prune_unused_columns; bool optimizer_enable_redistribute_nestloop_loj_inner_child; bool optimizer_force_comprehensive_join_implementation; - +bool optimizer_enable_replicated_table; /* Optimizer plan enumeration related GUCs */ bool optimizer_enumerate_plans; @@ -3061,6 +3061,17 @@ struct config_bool ConfigureNamesBool_gp[] = false, NULL, NULL, NULL }, + { + {"optimizer_enable_replicated_table", PGC_USERSET, DEVELOPER_OPTIONS, + gettext_noop("Enable replicated tables."), + NULL, + GUC_NOT_IN_SAMPLE + }, + &optimizer_enable_replicated_table, + true, + NULL, NULL, NULL + }, + /* End-of-list marker */ { diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index ad247dd940d..e1477b25845 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -528,6 +528,7 @@ extern bool optimizer_enable_mergejoin; extern bool optimizer_prune_unused_columns; extern bool optimizer_enable_redistribute_nestloop_loj_inner_child; extern bool optimizer_force_comprehensive_join_implementation; +extern bool optimizer_enable_replicated_table; /* Optimizer plan enumeration related GUCs */ extern bool optimizer_enumerate_plans; diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 82c23f64403..eca582d1798 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -448,6 +448,7 @@ "optimizer_enable_tablescan", "optimizer_enable_redistribute_nestloop_loj_inner_child", "optimizer_force_comprehensive_join_implementation", + "optimizer_enable_replicated_table", "optimizer_enforce_subplans", "optimizer_enumerate_plans", "optimizer_expand_fulljoin", diff --git a/src/test/regress/expected/rpt.out b/src/test/regress/expected/rpt.out index a3d386c5ac7..47ee448c448 100644 --- a/src/test/regress/expected/rpt.out +++ b/src/test/regress/expected/rpt.out @@ -1224,6 +1224,27 @@ explain (costs off) select * from t1_13532 x, t2_13532 y where y.a < random() an Optimizer: Postgres query optimizer (10 rows) +-- test for optimizer_enable_replicated_table +explain (costs off) select * from rep_tab; + QUERY PLAN +------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on rep_tab + Optimizer: Postgres query optimizer +(3 rows) + +set optimizer_enable_replicated_table=off; +set optimizer_trace_fallback=on; +explain (costs off) select * from rep_tab; + QUERY PLAN +------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on rep_tab + Optimizer: Postgres query optimizer +(3 rows) + +reset optimizer_trace_fallback; +reset optimizer_enable_replicated_table; -- start_ignore drop schema rpt cascade; NOTICE: drop cascades to 7 other objects diff --git a/src/test/regress/expected/rpt_optimizer.out b/src/test/regress/expected/rpt_optimizer.out index 757fcd4d2ea..1783eed147b 100644 --- a/src/test/regress/expected/rpt_optimizer.out +++ b/src/test/regress/expected/rpt_optimizer.out @@ -1208,6 +1208,29 @@ explain (costs off) select * from t1_13532 x, t2_13532 y where y.a < random() an Optimizer: Pivotal Optimizer (GPORCA) (8 rows) +-- test for optimizer_enable_replicated_table +explain (costs off) select * from rep_tab; + QUERY PLAN +------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on rep_tab + Optimizer: Pivotal Optimizer (GPORCA) +(3 rows) + +set optimizer_enable_replicated_table=off; +set optimizer_trace_fallback=on; +explain (costs off) select * from rep_tab; +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: Feature not supported: Use optimizer_enable_replicated_table to enable replicated tables + QUERY PLAN +------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on rep_tab + Optimizer: Postgres query optimizer +(3 rows) + +reset optimizer_trace_fallback; +reset optimizer_enable_replicated_table; -- start_ignore drop schema rpt cascade; NOTICE: drop cascades to 7 other objects diff --git a/src/test/regress/sql/rpt.sql b/src/test/regress/sql/rpt.sql index 42ae58242b8..96f0f28814a 100644 --- a/src/test/regress/sql/rpt.sql +++ b/src/test/regress/sql/rpt.sql @@ -512,6 +512,14 @@ explain (costs off) select * from t1_13532 x, t2_13532 y where y.a < random() an set enable_bitmapscan = off; explain (costs off) select * from t1_13532 x, t2_13532 y where y.a < random() and x.b = y.b; +-- test for optimizer_enable_replicated_table +explain (costs off) select * from rep_tab; +set optimizer_enable_replicated_table=off; +set optimizer_trace_fallback=on; +explain (costs off) select * from rep_tab; +reset optimizer_trace_fallback; +reset optimizer_enable_replicated_table; + -- start_ignore drop schema rpt cascade; -- end_ignore From 30f5944cf40b92d0aef74d2beac13147a587d699 Mon Sep 17 00:00:00 2001 From: Yao Wang Date: Mon, 18 Jul 2022 15:58:02 +0800 Subject: [PATCH 20/46] Remove one line about replacement_sort_tuples (#13805) The GUC replacement_sort_tuples was introduced in GP 9.6 to indicate the threshold to use replacement selection rather than quicksort. In PG12, the GUC was removed with all code related to replacement selection sort, and doesn't appear in GPDB7. However, in GPDB7 there is still one line about replacement_sort_tuples in sync_guc_name.h (without any other related code). It should be treated as a mistake. The fix is to simply remove the line and doesn't impact any existing behavior. --- src/include/utils/sync_guc_name.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 1b38e4a652d..ce2c1960a2d 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -134,7 +134,6 @@ "pljava_release_lingering_savepoints", "pljava_statement_cache_size", "pljava_vmoptions", - "replacement_sort_tuples", "row_security", "search_path", "statement_mem", From 1dbda846dcf8bbe52912256617a896fd7df4683c Mon Sep 17 00:00:00 2001 From: Zhenglong Li Date: Tue, 19 Jul 2022 10:08:30 +0800 Subject: [PATCH 21/46] skip red-zone check if runaway_detector_activation_percent set to 0 or 100 (#13668) When we set runaway_detector_activation_percent to 0 or 100 means to disable runaway detection, this should apply to Vmem Tracker and Resource Group. However, in the current implementation, we will still invoke IsGroupInRedZone() if we enabled resource group if we set runaway_detector_activation_percent to 0 or 100. And in function IsGroupInRedZone() has some automatic operation to read variables. At the same time RedZoneHandler_IsVmemRedZone is a very frequently called function, so this will waste a lot of CPU resources. When we init Red-Zone Handler, will set redZoneChunks to INT32_MAX if we disable runaway detection, so we can use it to judge whether we are in Red-Zone or not quickly. No more tests need, since current unit tests already have cases covering this situation. --- src/backend/utils/misc/guc_gp.c | 2 +- src/backend/utils/mmgr/redzone_handler.c | 50 ++++++++++++------- .../utils/mmgr/test/redzone_handler_test.c | 25 ++++++---- 3 files changed, 46 insertions(+), 31 deletions(-) diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index c54528ac67d..b8b43fbc892 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -3936,7 +3936,7 @@ struct config_int ConfigureNamesInt_gp[] = { {"runaway_detector_activation_percent", PGC_POSTMASTER, RESOURCES_MEM, - gettext_noop("The runaway detector activates if the used vmem exceeds this percentage of the vmem quota. Set to 100 to disable runaway detection."), + gettext_noop("The runaway detector activates if the used vmem exceeds this percentage of the vmem quota. Set to 0 or 100 to disable runaway detection."), NULL, }, &runaway_detector_activation_percent, diff --git a/src/backend/utils/mmgr/redzone_handler.c b/src/backend/utils/mmgr/redzone_handler.c index 47c5a30b9d6..c920d395695 100644 --- a/src/backend/utils/mmgr/redzone_handler.c +++ b/src/backend/utils/mmgr/redzone_handler.c @@ -45,6 +45,13 @@ int runaway_detector_activation_percent = 80; */ static int32 redZoneChunks = 0; +/* + * When runaway_detector_activation_percent set to 0 or 100, means disable runaway detection, + * and also disable Red-Zone check for resource group. We use the INT32_MAX to indicate that + * the current config is disabled Red-Zone check. + */ +#define DisableRedZoneCheckChunksValue INT32_MAX + /* * A shared memory binary flag (0 or 1) that identifies one process at-a-time as runaway * detector. At red-zone each process tries to determine runaway query, but only the first @@ -93,32 +100,30 @@ RedZoneHandler_ShmemInit() if(!IsUnderPostmaster) { - redZoneChunks = 0; - /* - * runaway_detector_activation_percent = 100% is reserved for not enforcing runaway - * detection by setting the redZoneChunks to an artificially high value. Also, during - * gpinitsystem we may start a QD without initializing the gp_vmem_protect_limit. - * This may result in 0 vmem protect limit. In such case, we ensure that the - * redZoneChunks is set to a large value. + * runaway_detector_activation_percent equals to 0 or 100 is reserved for not + * enforcing runaway detection by setting the redZoneChunks to an artificially + * high value, that's DisableRedZoneCheckChunksValue. + * + * Also, during gpinitsystem we may start a QD without initializing the + * gp_vmem_protect_limit. This may result in 0 vmem protect limit. In such case, + * we ensure that the redZoneChunks is set to a large value. + * + * When we enable resource group, we will not use redZoneChunks to determine + * whether the current process is in red-zone or not, so we can calculate the + * redZoneChunks, but it'll never be used. */ - if (runaway_detector_activation_percent != 100) - { + if (runaway_detector_activation_percent != 0 && + runaway_detector_activation_percent != 100 && + gp_vmem_protect_limit != 0) /* * Calculate red zone threshold in MB, and then convert MB to "chunks" * using chunk size for efficient comparison to detect red zone */ redZoneChunks = VmemTracker_ConvertVmemMBToChunks(gp_vmem_protect_limit * (((float) runaway_detector_activation_percent) / 100.0)); - } - - /* - * 0 means disable red-zone completely - * we also disable red-zone for resource group - */ - if (redZoneChunks == 0 || IsResGroupEnabled()) - { - redZoneChunks = INT32_MAX; - } + else + /* 0 or 100 means disable red-zone completely */ + redZoneChunks = DisableRedZoneCheckChunksValue; *isRunawayDetector = 0; } @@ -132,6 +137,13 @@ RedZoneHandler_IsVmemRedZone() { Assert(!vmemTrackerInited || redZoneChunks > 0); + /* + * if runaway_detector_activation_percent be set to 0 or 100, means + * disable runaway detection, just return false. + */ + if (redZoneChunks == DisableRedZoneCheckChunksValue) + return false; + if (vmemTrackerInited) { if (IsResGroupEnabled()) diff --git a/src/backend/utils/mmgr/test/redzone_handler_test.c b/src/backend/utils/mmgr/test/redzone_handler_test.c index 23571ce382b..3073adbcdb9 100755 --- a/src/backend/utils/mmgr/test/redzone_handler_test.c +++ b/src/backend/utils/mmgr/test/redzone_handler_test.c @@ -118,24 +118,24 @@ test__RedZoneHandler_ShmemInit__InitializesGlobalVarsWhenPostmaster(void **state fakeIsRunawayDetector = 1234; isRunawayDetector = NULL; - expect_any_count(ShmemInitStruct, name, 2); - expect_any_count(ShmemInitStruct, size, 2); - expect_any_count(ShmemInitStruct, foundPtr, 2); + expect_any_count(ShmemInitStruct, name, 3); + expect_any_count(ShmemInitStruct, size, 3); + expect_any_count(ShmemInitStruct, foundPtr, 3); will_assign_value(ShmemInitStruct, foundPtr, (bool) false); will_assign_value(ShmemInitStruct, foundPtr, (bool) false); - will_return_count(ShmemInitStruct, &fakeIsRunawayDetector, 2); + will_assign_value(ShmemInitStruct, foundPtr, (bool) false); + will_return_count(ShmemInitStruct, &fakeIsRunawayDetector, 3); + - /* - * When vmem limit is not activated or runaway_detector_activation_percent is - * set to 0,, red zone should be very high (i.e., red-zone will be disabled). - * Note, it doesn't matter what runaway_detector_activation_percent is set for - * this test, as the VmemTracker_ConvertVmemMBToChunks is returning 0. - */ will_return(VmemTracker_ConvertVmemMBToChunks, 0); expect_any(VmemTracker_ConvertVmemMBToChunks, mb); + /* + * When vmem limit is not activated or runaway_detector_activation_percent is + * set to 0, red zone should be very high (i.e., red-zone will be disabled). + */ + runaway_detector_activation_percent = 0; RedZoneHandler_ShmemInit(); - assert_true(isRunawayDetector == &fakeIsRunawayDetector); assert_true(redZoneChunks == INT32_MAX); assert_true(*isRunawayDetector == 0); @@ -149,6 +149,9 @@ test__RedZoneHandler_ShmemInit__InitializesGlobalVarsWhenPostmaster(void **state redZoneChunks = 0; RedZoneHandler_ShmemInit(); assert_true(redZoneChunks == INT32_MAX); + + runaway_detector_activation_percent = 80; + RedZoneHandler_ShmemInit(); } /* From 8b18dabe4c438a0457cfac2d37a922b11f467082 Mon Sep 17 00:00:00 2001 From: Zhenglong Li Date: Wed, 24 Aug 2022 11:22:59 +0800 Subject: [PATCH 22/46] using postgres query optimizer instead of ORCA to run resgroup group pipeline tests (#13974) The resource group pipeline uses ORCA as an optimizer by default. But as a resource management tool, it's unimportant which optimizer we use. So use postgres query optimizer instead of ORCA to run resource group pipeline tests. After that, we can remove the file of resgroup_bypass_optimizer.source and resgroup_bypass_optimizer_1.source. --- concourse/scripts/ic_gpdb_resgroup.bash | 2 +- .../input/resgroup/resgroup_bypass.source | 6 - .../output/resgroup/resgroup_bypass.source | 6 - .../resgroup/resgroup_bypass_optimizer.source | 388 ------------------ .../resgroup_bypass_optimizer_1.source | 388 ------------------ 5 files changed, 1 insertion(+), 789 deletions(-) delete mode 100644 src/test/isolation2/output/resgroup/resgroup_bypass_optimizer.source delete mode 100644 src/test/isolation2/output/resgroup/resgroup_bypass_optimizer_1.source diff --git a/concourse/scripts/ic_gpdb_resgroup.bash b/concourse/scripts/ic_gpdb_resgroup.bash index ad6ce9ff3c3..7413224a6a7 100755 --- a/concourse/scripts/ic_gpdb_resgroup.bash +++ b/concourse/scripts/ic_gpdb_resgroup.bash @@ -75,7 +75,7 @@ run_resgroup_test() { scp /home/gpadmin/gpdb_src/src/test/regress/regress.so \ gpadmin@sdw1:/home/gpadmin/gpdb_src/src/test/regress/ - make installcheck-resgroup || ( + make PGOPTIONS="-c optimizer=off" installcheck-resgroup || ( errcode=\$? find src/test/isolation2 -name regression.diffs \ | while read diff; do diff --git a/src/test/isolation2/input/resgroup/resgroup_bypass.source b/src/test/isolation2/input/resgroup/resgroup_bypass.source index 7874bfdbd80..d22b09160cd 100644 --- a/src/test/isolation2/input/resgroup/resgroup_bypass.source +++ b/src/test/isolation2/input/resgroup/resgroup_bypass.source @@ -123,12 +123,6 @@ DROP FUNCTION func_resgroup_bypass_test(int); -- orca will allocate 10M memory error buffer before optimization, and release -- it after that, so if optimizer is set to on, it will fail when the memory -- usage reaches 24M --- --- GPDB_12_MERGE_FIXME: when this case running under orca and without cassert --- it will output different result from the case with cassert. This might be --- because of without cassert, it does use less memory. To make the case green --- to unblock merging into master, add an extra ansfile for this case. We should --- consider adding some error interface for these cases post-merge. 61: SET ROLE role_bypass_test; 61: SET gp_resource_group_bypass to on; diff --git a/src/test/isolation2/output/resgroup/resgroup_bypass.source b/src/test/isolation2/output/resgroup/resgroup_bypass.source index c73d4abb3fc..0dfb9b00ab4 100644 --- a/src/test/isolation2/output/resgroup/resgroup_bypass.source +++ b/src/test/isolation2/output/resgroup/resgroup_bypass.source @@ -148,12 +148,6 @@ DROP -- orca will allocate 10M memory error buffer before optimization, and release -- it after that, so if optimizer is set to on, it will fail when the memory -- usage reaches 24M --- --- GPDB_12_MERGE_FIXME: when this case running under orca and without cassert --- it will output different result from the case with cassert. This might be --- because of without cassert, it does use less memory. To make the case green --- to unblock merging into master, add an extra ansfile for this case. We should --- consider adding some error interface for these cases post-merge. 61: SET ROLE role_bypass_test; SET diff --git a/src/test/isolation2/output/resgroup/resgroup_bypass_optimizer.source b/src/test/isolation2/output/resgroup/resgroup_bypass_optimizer.source deleted file mode 100644 index 95de3412538..00000000000 --- a/src/test/isolation2/output/resgroup/resgroup_bypass_optimizer.source +++ /dev/null @@ -1,388 +0,0 @@ -DROP ROLE IF EXISTS role_bypass_test; -DROP --- start_ignore -DROP RESOURCE GROUP rg_bypass_test; -DROP --- end_ignore - --- --- setup --- - -CREATE RESOURCE GROUP rg_bypass_test WITH (concurrency=2, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50); -CREATE -CREATE ROLE role_bypass_test RESOURCE GROUP rg_bypass_test; -CREATE - -CREATE OR REPLACE FUNCTION repeatPalloc(int, int) RETURNS int AS '@abs_builddir@/../regress/regress@DLSUFFIX@', 'repeatPalloc' LANGUAGE C READS SQL DATA; -CREATE - -CREATE OR REPLACE FUNCTION hold_memory(int, int) RETURNS int AS $$ SELECT * FROM repeatPalloc(1, $2) $$ LANGUAGE sql; -CREATE - -CREATE OR REPLACE VIEW eat_memory_on_qd_small AS SELECT hold_memory(0,12); -CREATE - -CREATE OR REPLACE VIEW eat_memory_on_qd_large AS SELECT hold_memory(0,100); -CREATE - -CREATE OR REPLACE VIEW eat_memory_on_one_slice AS SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory(t1.dbid,4)=0 ; -CREATE - -CREATE OR REPLACE VIEW eat_memory_on_slices AS SELECT count(null) FROM gp_dist_random('gp_id') t1, gp_dist_random('gp_id') t2 WHERE hold_memory(t1.dbid,4)=0 AND hold_memory(t2.dbid,4)=0 ; -CREATE - -CREATE OR REPLACE FUNCTION round_test(float, integer) RETURNS float AS $$ SELECT round($1 / $2) * $2 $$ LANGUAGE sql; -CREATE - -CREATE OR REPLACE VIEW memory_result AS SELECT rsgname, ismaster, round_test(avg(memory_usage), 1) AS avg_mem FROM( SELECT rsgname, CASE (j->'key')::text WHEN '"-1"'::text THEN 1 ELSE 0 END AS ismaster, ((j->'value')->>'used')::int AS memory_usage FROM( SELECT rsgname, row_to_json(json_each(memory_usage::json)) AS j FROM gp_toolkit.gp_resgroup_status WHERE rsgname='rg_bypass_test' )a )b GROUP BY (rsgname, ismaster) ORDER BY rsgname, ismaster; -CREATE - -GRANT ALL ON eat_memory_on_qd_small TO role_bypass_test; -GRANT -GRANT ALL ON eat_memory_on_qd_large TO role_bypass_test; -GRANT -GRANT ALL ON eat_memory_on_one_slice TO role_bypass_test; -GRANT -GRANT ALL ON eat_memory_on_slices TO role_bypass_test; -GRANT -GRANT ALL ON memory_result TO role_bypass_test; -GRANT - --- --- SET command should be bypassed --- - -ALTER RESOURCE GROUP rg_bypass_test SET concurrency 0; -ALTER -61: SET ROLE role_bypass_test; -SET -61&: SELECT 1; -ALTER RESOURCE GROUP rg_bypass_test set concurrency 1; -ALTER -61<: <... completed> - ?column? ----------- - 1 -(1 row) -ALTER RESOURCE GROUP rg_bypass_test set concurrency 0; -ALTER -61: SET enable_hashagg to on; -SET -61: SHOW enable_hashagg; - enable_hashagg ----------------- - on -(1 row) -61: invalid_syntax; -ERROR: syntax error at or near "invalid_syntax" -LINE 1: invalid_syntax; - ^ -61q: ... - --- --- gp_resource_group_bypass --- - -ALTER RESOURCE GROUP rg_bypass_test SET concurrency 0; -ALTER -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61: SHOW gp_resource_group_bypass; - gp_resource_group_bypass --------------------------- - on -(1 row) -61: CREATE TABLE table_bypass_test (c1 int); -CREATE -61: INSERT INTO table_bypass_test SELECT generate_series(1,100); -INSERT 100 -61: SELECT count(*) FROM table_bypass_test; - count -------- - 100 -(1 row) -61: DROP TABLE table_bypass_test; -DROP -61: SET gp_resource_group_bypass to off; -SET -61: SHOW gp_resource_group_bypass; - gp_resource_group_bypass --------------------------- - off -(1 row) -61q: ... - --- --- gp_resource_group_bypass is not allowed inside a transaction block --- - -61: BEGIN; -BEGIN -61: SET gp_resource_group_bypass to on; -ERROR: SET gp_resource_group_bypass cannot run inside a transaction block -61: ABORT; -ABORT -61q: ... - --- --- gp_resource_group_bypass is not allowed inside a function --- - -DROP FUNCTION IF EXISTS func_resgroup_bypass_test(int); -DROP -CREATE FUNCTION func_resgroup_bypass_test(c1 int) RETURNS INT AS $$ SET gp_resource_group_bypass TO ON; /* inside a function */ SELECT 1 $$ LANGUAGE SQL; -CREATE -SELECT func_resgroup_bypass_test(1); -ERROR: SET gp_resource_group_bypass cannot run inside a transaction block -CONTEXT: SQL function "func_resgroup_bypass_test" statement 1 -DROP FUNCTION func_resgroup_bypass_test(int); -DROP - - --- --- memory limit in bypass mode, on qd --- --- orca will allocate 10M memory error buffer before optimization, and release --- it after that, so if optimizer is set to on, it will fail when the memory --- usage reaches 24M --- --- GPDB_12_MERGE_FIXME: when this case running under orca and without cassert --- it will output different result from the case with cassert. This might be --- because of without cassert, it does use less memory. To make the case green --- to unblock merging into master, add an extra ansfile for this case. We should --- consider adding some error interface for these cases post-merge. - -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61: BEGIN; -BEGIN -61: SELECT * FROM eat_memory_on_qd_small; - hold_memory -------------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 13 -(2 rows) -61: SELECT * FROM eat_memory_on_qd_large; -ERROR: Out of memory -DETAIL: Resource group memory limit reached -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 0 -(2 rows) -61: ABORT; -ABORT -61: BEGIN; -BEGIN -SELECT 1 FROM memory_result where avg_mem > 10 and ismaster = 1; - ?column? ----------- - 1 -(1 row) -61q: ... - --- --- memory limit in bypass mode, on one slice --- - -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61: BEGIN; -BEGIN -61: SELECT * FROM eat_memory_on_one_slice; - count -------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 4 - rg_bypass_test | 1 | 1 -(2 rows) -61: SELECT * FROM eat_memory_on_one_slice; - count -------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 8 - rg_bypass_test | 1 | 1 -(2 rows) -61: SELECT * FROM eat_memory_on_one_slice; -ERROR: Out of memory (seg0 slice1 127.0.0.1:25432 pid=336) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory" statement 1 -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 0 -(2 rows) -61: ABORT; -ABORT -61: BEGIN; -BEGIN -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 1 -(2 rows) -61q: ... - --- --- memory limit in bypass mode, on slices --- - -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61: BEGIN; -BEGIN -61: SELECT * FROM eat_memory_on_slices; - count -------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 4 - rg_bypass_test | 1 | 1 -(2 rows) -61: SELECT * FROM eat_memory_on_slices; - count -------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 8 - rg_bypass_test | 1 | 1 -(2 rows) -61: SELECT * FROM eat_memory_on_slices; -ERROR: Out of memory (seg0 slice2 127.0.0.1:25432 pid=354) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory" statement 1 -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 0 -(2 rows) -61: ABORT; -ABORT -61: BEGIN; -BEGIN -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 1 -(2 rows) -61q: ... - --- --- gp_resgroup_status.num_running is updated in bypass mode --- - -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61&: SELECT pg_sleep(10); -62: SET ROLE role_bypass_test; -SET -62: SET gp_resource_group_bypass to on; -SET -62&: SELECT pg_sleep(20); -SELECT num_running FROM gp_toolkit.gp_resgroup_status WHERE rsgname='rg_bypass_test'; - num_running -------------- - 2 -(1 row) -SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE rsgname='rg_bypass_test'; - pg_cancel_backend -------------------- - t - t -(2 rows) -61<: <... completed> -ERROR: canceling statement due to user request -62<: <... completed> -ERROR: canceling statement due to user request -61q: ... -62q: ... - --- --- pg_stat_activity is updated in bypass mode --- - -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61&: SELECT pg_sleep(10); -62: SET ROLE role_bypass_test; -SET -62: SET gp_resource_group_bypass to on; -SET -62&: SELECT pg_sleep(20); -SELECT query FROM pg_stat_activity WHERE rsgname='rg_bypass_test'; - query ----------------------- - SELECT pg_sleep(20); - SELECT pg_sleep(10); -(2 rows) -SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE rsgname='rg_bypass_test'; - pg_cancel_backend -------------------- - t - t -(2 rows) -61<: <... completed> -ERROR: canceling statement due to user request -62<: <... completed> -ERROR: canceling statement due to user request -61q: ... -62q: ... - --- --- cleanup --- - -REVOKE ALL ON eat_memory_on_qd_small FROM role_bypass_test; -REVOKE -REVOKE ALL ON eat_memory_on_qd_large FROM role_bypass_test; -REVOKE -REVOKE ALL ON eat_memory_on_one_slice FROM role_bypass_test; -REVOKE -REVOKE ALL ON eat_memory_on_slices FROM role_bypass_test; -REVOKE -REVOKE ALL ON memory_result FROM role_bypass_test; -REVOKE - -DROP ROLE role_bypass_test; -DROP -DROP RESOURCE GROUP rg_bypass_test; -DROP - --- vi:filetype=sql: diff --git a/src/test/isolation2/output/resgroup/resgroup_bypass_optimizer_1.source b/src/test/isolation2/output/resgroup/resgroup_bypass_optimizer_1.source deleted file mode 100644 index 48d00a54e9b..00000000000 --- a/src/test/isolation2/output/resgroup/resgroup_bypass_optimizer_1.source +++ /dev/null @@ -1,388 +0,0 @@ -DROP ROLE IF EXISTS role_bypass_test; -DROP --- start_ignore -DROP RESOURCE GROUP rg_bypass_test; -DROP --- end_ignore - --- --- setup --- - -CREATE RESOURCE GROUP rg_bypass_test WITH (concurrency=2, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50); -CREATE -CREATE ROLE role_bypass_test RESOURCE GROUP rg_bypass_test; -CREATE - -CREATE OR REPLACE FUNCTION repeatPalloc(int, int) RETURNS int AS '@abs_builddir@/../regress/regress@DLSUFFIX@', 'repeatPalloc' LANGUAGE C READS SQL DATA; -CREATE - -CREATE OR REPLACE FUNCTION hold_memory(int, int) RETURNS int AS $$ SELECT * FROM repeatPalloc(1, $2) $$ LANGUAGE sql; -CREATE - -CREATE OR REPLACE VIEW eat_memory_on_qd_small AS SELECT hold_memory(0,12); -CREATE - -CREATE OR REPLACE VIEW eat_memory_on_qd_large AS SELECT hold_memory(0,100); -CREATE - -CREATE OR REPLACE VIEW eat_memory_on_one_slice AS SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory(t1.dbid,4)=0 ; -CREATE - -CREATE OR REPLACE VIEW eat_memory_on_slices AS SELECT count(null) FROM gp_dist_random('gp_id') t1, gp_dist_random('gp_id') t2 WHERE hold_memory(t1.dbid,4)=0 AND hold_memory(t2.dbid,4)=0 ; -CREATE - -CREATE OR REPLACE FUNCTION round_test(float, integer) RETURNS float AS $$ SELECT round($1 / $2) * $2 $$ LANGUAGE sql; -CREATE - -CREATE OR REPLACE VIEW memory_result AS SELECT rsgname, ismaster, round_test(avg(memory_usage), 1) AS avg_mem FROM( SELECT rsgname, CASE (j->'key')::text WHEN '"-1"'::text THEN 1 ELSE 0 END AS ismaster, ((j->'value')->>'used')::int AS memory_usage FROM( SELECT rsgname, row_to_json(json_each(memory_usage::json)) AS j FROM gp_toolkit.gp_resgroup_status WHERE rsgname='rg_bypass_test' )a )b GROUP BY (rsgname, ismaster) ORDER BY rsgname, ismaster; -CREATE - -GRANT ALL ON eat_memory_on_qd_small TO role_bypass_test; -GRANT -GRANT ALL ON eat_memory_on_qd_large TO role_bypass_test; -GRANT -GRANT ALL ON eat_memory_on_one_slice TO role_bypass_test; -GRANT -GRANT ALL ON eat_memory_on_slices TO role_bypass_test; -GRANT -GRANT ALL ON memory_result TO role_bypass_test; -GRANT - --- --- SET command should be bypassed --- - -ALTER RESOURCE GROUP rg_bypass_test SET concurrency 0; -ALTER -61: SET ROLE role_bypass_test; -SET -61&: SELECT 1; -ALTER RESOURCE GROUP rg_bypass_test set concurrency 1; -ALTER -61<: <... completed> - ?column? ----------- - 1 -(1 row) -ALTER RESOURCE GROUP rg_bypass_test set concurrency 0; -ALTER -61: SET enable_hashagg to on; -SET -61: SHOW enable_hashagg; - enable_hashagg ----------------- - on -(1 row) -61: invalid_syntax; -ERROR: syntax error at or near "invalid_syntax" -LINE 1: invalid_syntax; - ^ -61q: ... - --- --- gp_resource_group_bypass --- - -ALTER RESOURCE GROUP rg_bypass_test SET concurrency 0; -ALTER -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61: SHOW gp_resource_group_bypass; - gp_resource_group_bypass --------------------------- - on -(1 row) -61: CREATE TABLE table_bypass_test (c1 int); -CREATE -61: INSERT INTO table_bypass_test SELECT generate_series(1,100); -INSERT 100 -61: SELECT count(*) FROM table_bypass_test; - count -------- - 100 -(1 row) -61: DROP TABLE table_bypass_test; -DROP -61: SET gp_resource_group_bypass to off; -SET -61: SHOW gp_resource_group_bypass; - gp_resource_group_bypass --------------------------- - off -(1 row) -61q: ... - --- --- gp_resource_group_bypass is not allowed inside a transaction block --- - -61: BEGIN; -BEGIN -61: SET gp_resource_group_bypass to on; -ERROR: SET gp_resource_group_bypass cannot run inside a transaction block -61: ABORT; -ABORT -61q: ... - --- --- gp_resource_group_bypass is not allowed inside a function --- - -DROP FUNCTION IF EXISTS func_resgroup_bypass_test(int); -DROP -CREATE FUNCTION func_resgroup_bypass_test(c1 int) RETURNS INT AS $$ SET gp_resource_group_bypass TO ON; /* inside a function */ SELECT 1 $$ LANGUAGE SQL; -CREATE -SELECT func_resgroup_bypass_test(1); -ERROR: SET gp_resource_group_bypass cannot run inside a transaction block -CONTEXT: SQL function "func_resgroup_bypass_test" statement 1 -DROP FUNCTION func_resgroup_bypass_test(int); -DROP - - --- --- memory limit in bypass mode, on qd --- --- orca will allocate 10M memory error buffer before optimization, and release --- it after that, so if optimizer is set to on, it will fail when the memory --- usage reaches 24M --- --- GPDB_12_MERGE_FIXME: when this case running under orca and without cassert --- it will output different result from the case with cassert. This might be --- because of without cassert, it does use less memory. To make the case green --- to unblock merging into master, add an extra ansfile for this case. We should --- consider adding some error interface for these cases post-merge. - -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61: BEGIN; -BEGIN -61: SELECT * FROM eat_memory_on_qd_small; - hold_memory -------------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 12 -(2 rows) -61: SELECT * FROM eat_memory_on_qd_large; -ERROR: Out of memory -DETAIL: Resource group memory limit reached -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 0 -(2 rows) -61: ABORT; -ABORT -61: BEGIN; -BEGIN -SELECT 1 FROM memory_result where avg_mem > 10 and ismaster = 1; - ?column? ----------- - 1 -(1 row) -61q: ... - --- --- memory limit in bypass mode, on one slice --- - -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61: BEGIN; -BEGIN -61: SELECT * FROM eat_memory_on_one_slice; - count -------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 4 - rg_bypass_test | 1 | 0 -(2 rows) -61: SELECT * FROM eat_memory_on_one_slice; - count -------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 8 - rg_bypass_test | 1 | 0 -(2 rows) -61: SELECT * FROM eat_memory_on_one_slice; -ERROR: Out of memory (seg0 slice1 127.0.0.1:25432 pid=336) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory" statement 1 -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 0 -(2 rows) -61: ABORT; -ABORT -61: BEGIN; -BEGIN -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 0 -(2 rows) -61q: ... - --- --- memory limit in bypass mode, on slices --- - -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61: BEGIN; -BEGIN -61: SELECT * FROM eat_memory_on_slices; - count -------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 4 - rg_bypass_test | 1 | 0 -(2 rows) -61: SELECT * FROM eat_memory_on_slices; - count -------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 8 - rg_bypass_test | 1 | 0 -(2 rows) -61: SELECT * FROM eat_memory_on_slices; -ERROR: Out of memory (seg0 slice2 127.0.0.1:25432 pid=354) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory" statement 1 -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 0 -(2 rows) -61: ABORT; -ABORT -61: BEGIN; -BEGIN -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 0 -(2 rows) -61q: ... - --- --- gp_resgroup_status.num_running is updated in bypass mode --- - -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61&: SELECT pg_sleep(10); -62: SET ROLE role_bypass_test; -SET -62: SET gp_resource_group_bypass to on; -SET -62&: SELECT pg_sleep(20); -SELECT num_running FROM gp_toolkit.gp_resgroup_status WHERE rsgname='rg_bypass_test'; - num_running -------------- - 2 -(1 row) -SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE rsgname='rg_bypass_test'; - pg_cancel_backend -------------------- - t - t -(2 rows) -61<: <... completed> -ERROR: canceling statement due to user request -62<: <... completed> -ERROR: canceling statement due to user request -61q: ... -62q: ... - --- --- pg_stat_activity is updated in bypass mode --- - -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61&: SELECT pg_sleep(10); -62: SET ROLE role_bypass_test; -SET -62: SET gp_resource_group_bypass to on; -SET -62&: SELECT pg_sleep(20); -SELECT query FROM pg_stat_activity WHERE rsgname='rg_bypass_test'; - query ----------------------- - SELECT pg_sleep(20); - SELECT pg_sleep(10); -(2 rows) -SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE rsgname='rg_bypass_test'; - pg_cancel_backend -------------------- - t - t -(2 rows) -61<: <... completed> -ERROR: canceling statement due to user request -62<: <... completed> -ERROR: canceling statement due to user request -61q: ... -62q: ... - --- --- cleanup --- - -REVOKE ALL ON eat_memory_on_qd_small FROM role_bypass_test; -REVOKE -REVOKE ALL ON eat_memory_on_qd_large FROM role_bypass_test; -REVOKE -REVOKE ALL ON eat_memory_on_one_slice FROM role_bypass_test; -REVOKE -REVOKE ALL ON eat_memory_on_slices FROM role_bypass_test; -REVOKE -REVOKE ALL ON memory_result FROM role_bypass_test; -REVOKE - -DROP ROLE role_bypass_test; -DROP -DROP RESOURCE GROUP rg_bypass_test; -DROP - --- vi:filetype=sql: From 9f4d8623179fea8fb5ba21077b09dec3f7b98055 Mon Sep 17 00:00:00 2001 From: zjpedu Date: Mon, 29 Aug 2022 11:33:42 +0800 Subject: [PATCH 23/46] [7X] Feat: Find the pids of overflowed subtransaction (#13992) [7X] Feat: Identify backends with suboverflowed txs Subtransaction overflow is a chronic problem for Postgres and Greenplum, which arises when a backend creates more than PGPROC_MAX_CACHED_SUBXIDS (64) subtransactions. This is often caused by the use of plpgsql EXCEPTION blocks, SAVEPOINT etc. Overflow implies that pg_subtrans needs to be consulted and the in-memory XidCache is no longer sufficient. The lookup cost is particularly felt when there are long running transactions in the system, in addition to backends with suboverflow. Long running transactions increase the xmin boundary, leading to more lookups, especially older pages in pg_subtrans. Looking up older pages while we are constantly generating new pg_subtrans pages (with the suboverflowed backend(s)) leads to pg_subtrans LRU misses, exacerbating the slowdown in overall system query performance. Terminating the backend with suboverflow or backends with long running transactions can help alleviate the potential performance problems. This commit provides an extension and a view which can help DBAs identify suboverflown backends, which they can subsequently terminate. Please note that backends should be terminated from the master (which will automatically terminate the corresponding backends on the segments). --- src/backend/catalog/system_views.sql | 8 +- src/backend/cdb/cdbutil.c | 58 ++++++- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_proc.dat | 2 + src/test/regress/expected/subtrx_overflow.out | 141 ++++++++++++++++++ src/test/regress/greenplum_schedule | 3 + src/test/regress/sql/subtrx_overflow.sql | 96 ++++++++++++ 7 files changed, 307 insertions(+), 3 deletions(-) create mode 100644 src/test/regress/expected/subtrx_overflow.out create mode 100644 src/test/regress/sql/subtrx_overflow.sql diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 48e6068c0f0..37c72f6101f 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1756,4 +1756,10 @@ REVOKE ALL ON pg_replication_origin_status FROM public; REVOKE ALL ON pg_subscription FROM public; GRANT SELECT (oid, subdbid, subname, subowner, subenabled, subbinary, substream, subslotname, subsynccommit, subpublications) - ON pg_subscription TO public; \ No newline at end of file + ON pg_subscription TO public; + +-- Dispatch and Aggregate the backends information of subtransactions overflowed +CREATE VIEW gp_suboverflowed_backend(segid, pids) AS + SELECT -1, gp_get_suboverflowed_backends() +UNION ALL + SELECT gp_segment_id, gp_get_suboverflowed_backends() FROM gp_dist_random('gp_id') order by 1; diff --git a/src/backend/cdb/cdbutil.c b/src/backend/cdb/cdbutil.c index 938b13b03c8..152e04be6bb 100644 --- a/src/backend/cdb/cdbutil.c +++ b/src/backend/cdb/cdbutil.c @@ -99,6 +99,8 @@ static HTAB *hostPrimaryCountHashTableInit(void); static int nextQEIdentifer(CdbComponentDatabases *cdbs); +Datum gp_get_suboverflowed_backends(PG_FUNCTION_ARGS); + static HTAB *segment_ip_cache_htab = NULL; int numsegmentsFromQD = -1; @@ -1950,6 +1952,33 @@ AvoidCorefileGeneration() #endif } +PG_FUNCTION_INFO_V1(gp_get_suboverflowed_backends); +/* + * Find the backends where subtransaction overflowed. + */ +Datum +gp_get_suboverflowed_backends(PG_FUNCTION_ARGS) +{ + int i; + ArrayBuildState *astate = NULL; + + LWLockAcquire(ProcArrayLock, LW_SHARED); + for (i = 0; i < ProcGlobal->allProcCount; i++) + { + if (ProcGlobal->allPgXact[i].overflowed) + astate = accumArrayResult(astate, + Int32GetDatum(ProcGlobal->allProcs[i].pid), + false, INT4OID, CurrentMemoryContext); + } + LWLockRelease(ProcArrayLock); + + if (astate) + PG_RETURN_DATUM(makeArrayResult(astate, + CurrentMemoryContext)); + else + PG_RETURN_NULL(); +} + #else bool am_ftshandler = false; @@ -4139,4 +4168,31 @@ AvoidCorefileGeneration() #endif } -#endif /* USE_INTERNAL_FTS */ \ No newline at end of file +PG_FUNCTION_INFO_V1(gp_get_suboverflowed_backends); +/* + * Find the backends where subtransaction overflowed. + */ +Datum +gp_get_suboverflowed_backends(PG_FUNCTION_ARGS) +{ + int i; + ArrayBuildState *astate = NULL; + + LWLockAcquire(ProcArrayLock, LW_SHARED); + for (i = 0; i < ProcGlobal->allProcCount; i++) + { + if (ProcGlobal->allPgXact[i].overflowed) + astate = accumArrayResult(astate, + Int32GetDatum(ProcGlobal->allProcs[i].pid), + false, INT4OID, CurrentMemoryContext); + } + LWLockRelease(ProcArrayLock); + + if (astate) + PG_RETURN_DATUM(makeArrayResult(astate, + CurrentMemoryContext)); + else + PG_RETURN_NULL(); +} + +#endif /* USE_INTERNAL_FTS */ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index e0b273a178f..ca6c907dbe2 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -56,6 +56,6 @@ */ /* 3yyymmddN */ -#define CATALOG_VERSION_NO 302204081 +#define CATALOG_VERSION_NO 302208261 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 37f35ca8018..cd4616b8fbb 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11821,6 +11821,8 @@ proargmodes => '{o,o,o,o,o,o,o,o,o,o}', proargnames => '{segid,waiter_dxid,holder_dxid,holdTillEndXact,waiter_lpid,holder_lpid,waiter_lockmode,waiter_locktype,waiter_sessionid,holder_sessionid}', prosrc => 'gp_dist_wait_status' }, +{ oid => 6464, descr => 'get backends of overflowed subtransaction', + proname => 'gp_get_suboverflowed_backends', provolatile => 'v', prorettype => '_int4', proargtypes => '', prosrc => 'gp_get_suboverflowed_backends' }, { oid => 6040, descr => 'get gp all segments pg_snapshot', proname => 'gp_current_snapshot', proisstrict => 'f', diff --git a/src/test/regress/expected/subtrx_overflow.out b/src/test/regress/expected/subtrx_overflow.out new file mode 100644 index 00000000000..922a6117e2f --- /dev/null +++ b/src/test/regress/expected/subtrx_overflow.out @@ -0,0 +1,141 @@ +-- It will occur subtransaction overflow when insert data to segments 1000 times. +-- All segments occur overflow. +DROP TABLE IF EXISTS t_1352_1; +NOTICE: table "t_1352_1" does not exist, skipping +CREATE TABLE t_1352_1(c1 int) DISTRIBUTED BY (c1); +CREATE OR REPLACE FUNCTION transaction_test0() +RETURNS void AS $$ +DECLARE + i int; +BEGIN + FOR i in 0..1000 + LOOP + BEGIN + INSERT INTO t_1352_1 VALUES(i); + EXCEPTION + WHEN UNIQUE_VIOLATION THEN + NULL; + END; + END LOOP; +END; +$$ +LANGUAGE plpgsql; +-- It will occur subtransaction overflow when insert data to segments 1000 times. +-- All segments occur overflow. +DROP TABLE IF EXISTS t_1352_2; +NOTICE: table "t_1352_2" does not exist, skipping +CREATE TABLE t_1352_2(c int PRIMARY KEY); +CREATE OR REPLACE FUNCTION transaction_test1() +RETURNS void AS $$ +DECLARE i int; +BEGIN + for i in 0..1000 + LOOP + BEGIN + INSERT INTO t_1352_2 VALUES(i); + EXCEPTION + WHEN UNIQUE_VIOLATION THEN + NULL; + END; + END LOOP; +END; +$$ +LANGUAGE plpgsql; +-- It occur subtransaction overflow for coordinator and all segments. +CREATE OR REPLACE FUNCTION transaction_test2() +RETURNS void AS $$ +DECLARE + i int; +BEGIN + for i in 0..1000 + LOOP + BEGIN + CREATE TEMP TABLE tmptab(c int) DISTRIBUTED BY (c); + DROP TABLE tmptab; + EXCEPTION + WHEN others THEN + NULL; + END; + END LOOP; +END; +$$ +LANGUAGE plpgsql; +BEGIN; +SELECT transaction_test0(); + transaction_test0 +------------------- + +(1 row) + +SELECT segid, count(*) AS num_suboverflowed FROM gp_suboverflowed_backend +WHERE array_length(pids, 1) > 0 +GROUP BY segid +ORDER BY segid; + segid | num_suboverflowed +-------+------------------- + 0 | 1 + 1 | 1 + 2 | 1 +(3 rows) + +COMMIT; +BEGIN; +SELECT transaction_test1(); + transaction_test1 +------------------- + +(1 row) + +SELECT segid, count(*) AS num_suboverflowed FROM gp_suboverflowed_backend +WHERE array_length(pids, 1) > 0 +GROUP BY segid +ORDER BY segid; + segid | num_suboverflowed +-------+------------------- + 0 | 1 + 1 | 1 + 2 | 1 +(3 rows) + +COMMIT; +BEGIN; +SELECT transaction_test2(); + transaction_test2 +------------------- + +(1 row) + +SELECT segid, count(*) AS num_suboverflowed FROM gp_suboverflowed_backend +WHERE array_length(pids, 1) > 0 +GROUP BY segid +ORDER BY segid; + segid | num_suboverflowed +-------+------------------- + -1 | 1 + 0 | 1 + 1 | 1 + 2 | 1 +(4 rows) + +COMMIT; +BEGIN; +SELECT transaction_test0(); + transaction_test0 +------------------- + +(1 row) + +SELECT segid, count(*) AS num_suboverflowed FROM + (SELECT segid, unnest(pids) + FROM gp_suboverflowed_backend + WHERE array_length(pids, 1) > 0) AS tmp +GROUP BY segid +ORDER BY segid; + segid | num_suboverflowed +-------+------------------- + 0 | 1 + 1 | 1 + 2 | 1 +(3 rows) + +COMMIT; diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index 20f5c34500b..58a0ff6fcc2 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -324,4 +324,7 @@ test: directory_table # test if motion sockets are created with the gp_segment_configuration.address test: motion_socket +# subtransaction overflow test +test: subtrx_overflow + # end of tests diff --git a/src/test/regress/sql/subtrx_overflow.sql b/src/test/regress/sql/subtrx_overflow.sql new file mode 100644 index 00000000000..397a2e01874 --- /dev/null +++ b/src/test/regress/sql/subtrx_overflow.sql @@ -0,0 +1,96 @@ +-- It will occur subtransaction overflow when insert data to segments 1000 times. +-- All segments occur overflow. +DROP TABLE IF EXISTS t_1352_1; +CREATE TABLE t_1352_1(c1 int) DISTRIBUTED BY (c1); +CREATE OR REPLACE FUNCTION transaction_test0() +RETURNS void AS $$ +DECLARE + i int; +BEGIN + FOR i in 0..1000 + LOOP + BEGIN + INSERT INTO t_1352_1 VALUES(i); + EXCEPTION + WHEN UNIQUE_VIOLATION THEN + NULL; + END; + END LOOP; +END; +$$ +LANGUAGE plpgsql; + +-- It will occur subtransaction overflow when insert data to segments 1000 times. +-- All segments occur overflow. +DROP TABLE IF EXISTS t_1352_2; +CREATE TABLE t_1352_2(c int PRIMARY KEY); +CREATE OR REPLACE FUNCTION transaction_test1() +RETURNS void AS $$ +DECLARE i int; +BEGIN + for i in 0..1000 + LOOP + BEGIN + INSERT INTO t_1352_2 VALUES(i); + EXCEPTION + WHEN UNIQUE_VIOLATION THEN + NULL; + END; + END LOOP; +END; +$$ +LANGUAGE plpgsql; + +-- It occur subtransaction overflow for coordinator and all segments. +CREATE OR REPLACE FUNCTION transaction_test2() +RETURNS void AS $$ +DECLARE + i int; +BEGIN + for i in 0..1000 + LOOP + BEGIN + CREATE TEMP TABLE tmptab(c int) DISTRIBUTED BY (c); + DROP TABLE tmptab; + EXCEPTION + WHEN others THEN + NULL; + END; + END LOOP; +END; +$$ +LANGUAGE plpgsql; + +BEGIN; +SELECT transaction_test0(); +SELECT segid, count(*) AS num_suboverflowed FROM gp_suboverflowed_backend +WHERE array_length(pids, 1) > 0 +GROUP BY segid +ORDER BY segid; +COMMIT; + +BEGIN; +SELECT transaction_test1(); +SELECT segid, count(*) AS num_suboverflowed FROM gp_suboverflowed_backend +WHERE array_length(pids, 1) > 0 +GROUP BY segid +ORDER BY segid; +COMMIT; + +BEGIN; +SELECT transaction_test2(); +SELECT segid, count(*) AS num_suboverflowed FROM gp_suboverflowed_backend +WHERE array_length(pids, 1) > 0 +GROUP BY segid +ORDER BY segid; +COMMIT; + +BEGIN; +SELECT transaction_test0(); +SELECT segid, count(*) AS num_suboverflowed FROM + (SELECT segid, unnest(pids) + FROM gp_suboverflowed_backend + WHERE array_length(pids, 1) > 0) AS tmp +GROUP BY segid +ORDER BY segid; +COMMIT; From a8492e5017b7aec87beaab51c51d8839df590b3c Mon Sep 17 00:00:00 2001 From: zjpedu Date: Wed, 31 Aug 2022 15:02:16 +0800 Subject: [PATCH 24/46] Add a GUC to control the output of suboverflow transaction sql statement (#14019) We might want to also consider adding a log message to print the query string that caused the overflow. This is important as only 1 statement out of thousands executed in a backend may trigger the overflow, or the backend can come out of the overflow state before it is inspected with our view/UDF. Logging the statement will ensure that customers can pinpoint the offending statements. --- src/backend/access/transam/varsup.c | 6 +++++- src/backend/utils/misc/guc_gp.c | 12 ++++++++++++ src/include/utils/guc.h | 2 +- src/include/utils/sync_guc_name.h | 1 + 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 5ca18d1b77d..71eb5e83707 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -32,7 +32,7 @@ #include "access/distributedlog.h" #include "cdb/cdbvars.h" - +#include "tcop/tcopprot.h" /* Number of OIDs to prefetch (preallocate) per XLOG write */ #define VAR_OID_PREFETCH 8192 @@ -298,7 +298,11 @@ GetNewTransactionId(bool isSubXact) MyProc->subxidStatus.count = substat->count = nxids + 1; } else + { MyProc->subxidStatus.overflowed = substat->overflowed = true; + ereportif (gp_log_suboverflow_statement, LOG, + (errmsg("Statement caused suboverflow: %s", debug_query_string))); + } } LWLockRelease(XidGenLock); diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index b8b43fbc892..99f76487f83 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -162,6 +162,8 @@ bool gp_create_table_random_default_distribution = true; bool gp_allow_non_uniform_partitioning_ddl = true; int dtx_phase2_retry_second = 0; +bool gp_log_suboverflow_statement = false; + bool log_dispatch_stats = false; int explain_memory_verbosity = 0; @@ -3073,6 +3075,16 @@ struct config_bool ConfigureNamesBool_gp[] = }, + { + {"gp_log_suboverflow_statement", PGC_SUSET, LOGGING_WHAT, + gettext_noop("Enable logging of statements that cause subtransaction overflow."), + NULL, + }, + &gp_log_suboverflow_statement, + false, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index e1477b25845..237d5ec844f 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -327,7 +327,7 @@ extern bool Debug_resource_group; extern bool gp_create_table_random_default_distribution; extern bool gp_allow_non_uniform_partitioning_ddl; extern int dtx_phase2_retry_second; - +extern bool gp_log_suboverflow_statement; /* WAL replication debug gucs */ extern bool debug_walrepl_snd; extern bool debug_walrepl_syncrep; diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index ce2c1960a2d..2c7604a559e 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -64,6 +64,7 @@ "gp_log_resgroup_memory", "gp_log_resqueue_memory", "gp_log_stack_trace_lines", + "gp_log_suboverflow_statement", "gp_max_packet_size", "gp_max_slices", "gp_motion_slice_noop", From d27898dc689a36c3d2b4c877f8b8b59dbdf4b683 Mon Sep 17 00:00:00 2001 From: dreamedcheng Date: Tue, 13 Sep 2022 10:01:22 +0800 Subject: [PATCH 25/46] Put some locale related GUCs to sync guc array (#14068) Note that lc_monetary and lc_time are related to formatting output data. Besides, formatting functions will be pushed down to QEs in some common cases. So to keep the output data consistent with the locale value set in QD, we need to sync these GUCs between QD and QEs. Co-authored-by: wuchengwen --- src/include/utils/sync_guc_name.h | 2 + src/include/utils/unsync_guc_name.h | 2 - src/test/regress/expected/gp_sync_lc_gucs.out | 86 +++++++++++++++++++ src/test/regress/greenplum_schedule | 2 +- src/test/regress/sql/gp_sync_lc_gucs.sql | 47 ++++++++++ 5 files changed, 136 insertions(+), 3 deletions(-) create mode 100644 src/test/regress/expected/gp_sync_lc_gucs.out create mode 100644 src/test/regress/sql/gp_sync_lc_gucs.sql diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 2c7604a559e..1eb15e7678c 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -99,7 +99,9 @@ "jit_optimize_above_cost", "jit_profiling_support", "jit_tuple_deforming", + "lc_monetary", "lc_numeric", + "lc_time", "log_btree_build_stats", "log_dispatch_stats", "log_duration", diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index eca582d1798..845ba9290e8 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -326,8 +326,6 @@ "lc_collate", "lc_ctype", "lc_messages", - "lc_monetary", - "lc_time", "listen_addresses", "local_preload_libraries", "lock_timeout", diff --git a/src/test/regress/expected/gp_sync_lc_gucs.out b/src/test/regress/expected/gp_sync_lc_gucs.out new file mode 100644 index 00000000000..11010f45148 --- /dev/null +++ b/src/test/regress/expected/gp_sync_lc_gucs.out @@ -0,0 +1,86 @@ +-- This case test that if lc related GUCs are synchronized +-- between QD and QEs. +CREATE TABLE test_lc(c1 int8, c2 date) DISTRIBUTED BY (c1); +CREATE OR REPLACE FUNCTION public.segment_setting(guc text) + RETURNS SETOF text EXECUTE ON ALL SEGMENTS AS $$ + BEGIN RETURN NEXT pg_catalog.current_setting(guc); END + $$ LANGUAGE plpgsql; +INSERT INTO test_lc values ('4567890123456789', '2022-08-01'); +INSERT INTO test_lc values ('-4567890123456789', '2022-09-01'); +-- Test if lc_monetary is synced +SHOW lc_monetary; + lc_monetary +------------- + C +(1 row) + +SELECT to_char(c1, 'L9999999999999999.000') FROM test_lc; + to_char +------------------------ + 4567890123456789.000 + -4567890123456789.000 +(2 rows) + +SET lc_monetary = 'en_US.utf8'; +SELECT to_char(c1, 'L9999999999999999.000') FROM test_lc; + to_char +------------------------ + $ 4567890123456789.000 + $-4567890123456789.000 +(2 rows) + +-- If the QE processes are exited for whatever the reason, +-- QD should sync the lc_monetary to the newly created QEs. +SELECT pg_terminate_backend(pid) FROM gp_dist_random('pg_stat_activity') WHERE sess_id + in (SELECT sess_id from pg_stat_activity WHERE pid in (SELECT pg_backend_pid())) ; +ERROR: terminating connection due to administrator command (seg0 slice1 11.158.187.228:7002 pid=114366) +-- Should output the results given lc_monetary = 'en_US.utf8' +SELECT to_char(c1, 'L9999999999999999.000') FROM test_lc; + to_char +------------------------ + $ 4567890123456789.000 + $-4567890123456789.000 +(2 rows) + +RESET lc_monetary; +-- Test if lc_time is synced +SHOW lc_time; + lc_time +--------- + C +(1 row) + +SELECT to_char(c2, 'DD TMMON YYYY') FROM test_lc; + to_char +------------- + 01 AUG 2022 + 01 SEP 2022 +(2 rows) + +SET lc_time = 'en_US.utf8'; +-- Since 'C' and 'en_US.utf8' time formatting will output the same result, and in +-- some environments, we don't know which kind of locale it supports. So we just +-- use segment_setting to checking the setting of lc_time on QEs. +SELECT segment_setting('lc_time'); + segment_setting +----------------- + en_US.utf8 + en_US.utf8 + en_US.utf8 +(3 rows) + +-- If the QE processes are exited for whatever the reason, +-- QD should sync the lc_time to the newly created QEs. +SELECT pg_terminate_backend(pid) FROM gp_dist_random('pg_stat_activity') WHERE sess_id + in (SELECT sess_id from pg_stat_activity WHERE pid in (SELECT pg_backend_pid())) ; +ERROR: terminating connection due to administrator command (seg1 slice1 11.158.187.228:7003 pid=114379) +SELECT segment_setting('lc_time'); + segment_setting +----------------- + en_US.utf8 + en_US.utf8 + en_US.utf8 +(3 rows) + +DROP FUNCTION public.segment_setting(guc text); +DROP TABLE test_lc; diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index 58a0ff6fcc2..ee560250cf7 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -195,7 +195,7 @@ test: qp_functions_in_contexts_setup # test: qp_misc_rio_join_small qp_misc_rio qp_correlated_query qp_targeted_dispatch qp_gist_indexes2 qp_gist_indexes3 qp_gist_indexes4 qp_query_execution qp_functions_in_from qp_functions_in_select qp_functions_in_subquery qp_functions_in_subquery_column qp_functions_in_subquery_constant qp_functions_in_with test: qp_misc_rio_join_small qp_correlated_query qp_targeted_dispatch qp_gist_indexes2 qp_gist_indexes3 qp_gist_indexes4 qp_query_execution qp_functions_in_from qp_functions_in_select qp_functions_in_subquery qp_functions_in_subquery_column qp_functions_in_with correlated_subquery -test: dpe qp_dpe qp_subquery qp_left_anti_semi_join qp_union_intersect qp_functions qp_functions_idf qp_regexp qp_resource_queue qp_orca_fallback +test: dpe qp_dpe qp_subquery qp_left_anti_semi_join qp_union_intersect qp_functions qp_functions_idf qp_regexp qp_resource_queue qp_orca_fallback gp_sync_lc_gucs test: olap_setup test: qp_olap_group qp_olap_group2 diff --git a/src/test/regress/sql/gp_sync_lc_gucs.sql b/src/test/regress/sql/gp_sync_lc_gucs.sql new file mode 100644 index 00000000000..5c4e68d1eb9 --- /dev/null +++ b/src/test/regress/sql/gp_sync_lc_gucs.sql @@ -0,0 +1,47 @@ +-- This case test that if lc related GUCs are synchronized +-- between QD and QEs. + +CREATE TABLE test_lc(c1 int8, c2 date) DISTRIBUTED BY (c1); +CREATE OR REPLACE FUNCTION public.segment_setting(guc text) + RETURNS SETOF text EXECUTE ON ALL SEGMENTS AS $$ + BEGIN RETURN NEXT pg_catalog.current_setting(guc); END + $$ LANGUAGE plpgsql; + +INSERT INTO test_lc values ('4567890123456789', '2022-08-01'); +INSERT INTO test_lc values ('-4567890123456789', '2022-09-01'); + +-- Test if lc_monetary is synced +SHOW lc_monetary; +SELECT to_char(c1, 'L9999999999999999.000') FROM test_lc; + +SET lc_monetary = 'en_US.utf8'; +SELECT to_char(c1, 'L9999999999999999.000') FROM test_lc; + +-- If the QE processes are exited for whatever the reason, +-- QD should sync the lc_monetary to the newly created QEs. +SELECT pg_terminate_backend(pid) FROM gp_dist_random('pg_stat_activity') WHERE sess_id + in (SELECT sess_id from pg_stat_activity WHERE pid in (SELECT pg_backend_pid())) ; +-- Should output the results given lc_monetary = 'en_US.utf8' +SELECT to_char(c1, 'L9999999999999999.000') FROM test_lc; + +RESET lc_monetary; + + +-- Test if lc_time is synced +SHOW lc_time; +SELECT to_char(c2, 'DD TMMON YYYY') FROM test_lc; + +SET lc_time = 'en_US.utf8'; +-- Since 'C' and 'en_US.utf8' time formatting will output the same result, and in +-- some environments, we don't know which kind of locale it supports. So we just +-- use segment_setting to checking the setting of lc_time on QEs. +SELECT segment_setting('lc_time'); + +-- If the QE processes are exited for whatever the reason, +-- QD should sync the lc_time to the newly created QEs. +SELECT pg_terminate_backend(pid) FROM gp_dist_random('pg_stat_activity') WHERE sess_id + in (SELECT sess_id from pg_stat_activity WHERE pid in (SELECT pg_backend_pid())) ; +SELECT segment_setting('lc_time'); + +DROP FUNCTION public.segment_setting(guc text); +DROP TABLE test_lc; From 31bf11ebde612645ea8c22866bc3d616a67e7c6e Mon Sep 17 00:00:00 2001 From: xuejing zhao <80750564+zxuejing@users.noreply.github.com> Date: Tue, 8 Nov 2022 14:33:04 +0800 Subject: [PATCH 26/46] Add a GUC to display create gang time while executing statements Add GUC gp_print_create_gang_time control whether to print information about creating gang time. We print the create gang time for both DDL and DML. If all the segDescs of a gang are from the cached pool, we regard the gang as reused. We only display the shortest and longest establish conn time and their segindexs of a gang. The info of the shortest establish conn time and the longest establish conn time is the same for 1-gang. DDL: ``` create table t(tc1 int); INFO: The shortest establish conn time: 4.48 ms, segindex: 2, The longest establish conn time: 8.13 ms, segindex: 1 set optimizer=off; INFO: (Gang) is reused ``` DML: we can use DML or explain analyze to get create gang time. ``` select * from t_create_gang_time t1, t_create_gang_time t2 where t1.tc1=2; INFO: (Slice1) is reused INFO: (Slice2) The shortest establish conn time: 4.80 ms, segindex: 0, The longest establish conn time: 4.80 ms, segindex: 0 tc1 | tc2 | tc1 | tc2 -----+-----+-----+----- (0 rows) explain analyze select * from t_create_gang_time t1, t_create_gang_time t2 where t1.tc1=2; INFO: (Slice1) is reused INFO: (Slice2) is reused QUERY PLAN ...... ``` --- src/backend/cdb/dispatcher/cdbconn.c | 1 + src/backend/cdb/dispatcher/cdbdisp_query.c | 9 +++ src/backend/cdb/dispatcher/cdbgang.c | 61 ++++++++++++++++ src/backend/cdb/dispatcher/cdbgang_async.c | 9 ++- src/backend/utils/misc/guc_gp.c | 12 ++++ src/include/cdb/cdbconn.h | 2 + src/include/cdb/cdbgang.h | 1 + src/include/utils/unsync_guc_name.h | 1 + src/test/regress/init_file | 6 ++ src/test/regress/input/dispatch.source | 32 +++++++++ src/test/regress/output/dispatch.source | 83 ++++++++++++++++++++++ 11 files changed, 216 insertions(+), 1 deletion(-) diff --git a/src/backend/cdb/dispatcher/cdbconn.c b/src/backend/cdb/dispatcher/cdbconn.c index 74eeb0b6461..1e06bff4c5e 100644 --- a/src/backend/cdb/dispatcher/cdbconn.c +++ b/src/backend/cdb/dispatcher/cdbconn.c @@ -89,6 +89,7 @@ cdbconn_createSegmentDescriptor(struct CdbComponentDatabaseInfo *cdbinfo, int id segdbDesc->whoami = NULL; segdbDesc->identifier = identifier; segdbDesc->isWriter = isWriter; + segdbDesc->establishConnTime = 0; MemoryContextSwitchTo(oldContext); return segdbDesc; diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index 82420aa91d8..e4cdba4c099 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -53,6 +53,7 @@ extern bool Test_print_direct_dispatch_info; +extern bool gp_print_create_gang_time; typedef struct ParamWalkerContext { plan_tree_base_prefix base; /* Required prefix for @@ -333,6 +334,8 @@ CdbDispatchSetCommand(const char *strCommand, bool cancelOnError) queryText = buildGpQueryString(pQueryParms, &queryTextLength); primaryGang = AllocateGang(ds, GANGTYPE_PRIMARY_WRITER, cdbcomponent_getCdbComponentsList()); + if (gp_print_create_gang_time) + printCreateGangTime(-1, primaryGang); /* put all idle segment to a gang so QD can send SET command to them */ AllocateGang(ds, GANGTYPE_PRIMARY_READER, formIdleSegmentIdList()); @@ -505,6 +508,8 @@ cdbdisp_dispatchCommandInternal(DispatchCommandQueryParms *pQueryParms, * Allocate a primary QE for every available segDB in the system. */ primaryGang = AllocateGang(ds, GANGTYPE_PRIMARY_WRITER, segments); + if (gp_print_create_gang_time) + printCreateGangTime(-1, primaryGang); Assert(primaryGang); cdbdisp_makeDispatchResults(ds, 1, flags & DF_CANCEL_ON_ERROR); @@ -1153,6 +1158,8 @@ cdbdisp_dispatchX(QueryDesc* queryDesc, } primaryGang = slice->primaryGang; + if (gp_print_create_gang_time) + printCreateGangTime(si, primaryGang); Assert(primaryGang != NULL); AssertImply(queryDesc->extended_query, primaryGang->type == GANGTYPE_PRIMARY_READER || @@ -1345,6 +1352,8 @@ CdbDispatchCopyStart(struct CdbCopy *cdbCopy, Node *stmt, int flags) * Allocate a primary QE for every available segDB in the system. */ primaryGang = AllocateGang(ds, GANGTYPE_PRIMARY_WRITER, cdbCopy->seglist); + if (gp_print_create_gang_time) + printCreateGangTime(-1, primaryGang); Assert(primaryGang); cdbdisp_makeDispatchResults(ds, 1, flags & DF_CANCEL_ON_ERROR); diff --git a/src/backend/cdb/dispatcher/cdbgang.c b/src/backend/cdb/dispatcher/cdbgang.c index 12cce038e93..f6149958b0c 100644 --- a/src/backend/cdb/dispatcher/cdbgang.c +++ b/src/backend/cdb/dispatcher/cdbgang.c @@ -1133,3 +1133,64 @@ gp_backend_info(PG_FUNCTION_ARGS) } #undef BACKENDINFO_NATTR } + +/* + * Print the time of create a gang. + * if all segDescs of the gang are cached, we regard the gang as reused. + * else we print the shortest time and the longest time of estabishing connection to the segDesc. + */ +void +printCreateGangTime(int sliceId, Gang *gang) +{ + double shortestTime = -1, longestTime = -1; + int shortestSegIndex = -1, longestSegIndex = -1; + int size = gang->size; + SegmentDatabaseDescriptor *segdbDesc; + for (int i = 0; i < size; i++) + { + segdbDesc = gang->db_descriptors[i]; + /* the connection of segdbDesc is not cached */ + if (segdbDesc->establishConnTime != -1) + { + if (longestTime == -1 || segdbDesc->establishConnTime > longestTime) + { + longestTime = segdbDesc->establishConnTime; + longestSegIndex = segdbDesc->segindex; + } + if (shortestTime == -1 || segdbDesc->establishConnTime < shortestTime) + { + shortestTime = segdbDesc->establishConnTime; + shortestSegIndex = segdbDesc->segindex; + } + } + } + + /* All the segDescs are cached, and we regard this gang as reused gang. */ + if (longestTime == -1) + { + if (sliceId == -1) + { + elog(INFO, "(Gang) is reused"); + } + else + { + elog(INFO, "(Slice%d) is reused", sliceId); + } + + } + else + { + if (sliceId == -1) + { + elog(INFO, "The shortest establish conn time: %.2f ms, segindex: %d,\n" + " The longest establish conn time: %.2f ms, segindex: %d", + shortestTime, shortestSegIndex, longestTime, longestSegIndex); + } + else + { + elog(INFO, "(Slice%d) The shortest establish conn time: %.2f ms, segindex: %d,\n" + " The longest establish conn time: %.2f ms, segindex: %d", + sliceId, shortestTime, shortestSegIndex, longestTime, longestSegIndex); + } + } +} diff --git a/src/backend/cdb/dispatcher/cdbgang_async.c b/src/backend/cdb/dispatcher/cdbgang_async.c index e701e40e3ee..dcd7effd0b1 100644 --- a/src/backend/cdb/dispatcher/cdbgang_async.c +++ b/src/backend/cdb/dispatcher/cdbgang_async.c @@ -141,6 +141,8 @@ cdbgang_createGang_async(List *segments, SegmentType segmentType) if (segdbDesc->conn != NULL && !cdbconn_isBadConnection(segdbDesc)) { connStatusDone[i] = true; + /* -1 means this connection is cached */ + segdbDesc->establishConnTime = -1; successful_connections++; continue; } @@ -189,6 +191,8 @@ cdbgang_createGang_async(List *segments, SegmentType segmentType) gettimeofday(&startTS, NULL); fds = (struct pollfd *) palloc0(sizeof(struct pollfd) * size); + instr_time starttime, endtime; + INSTR_TIME_SET_CURRENT(starttime); /* record starttime of create gang */ for (;;) { int nready; @@ -217,7 +221,10 @@ cdbgang_createGang_async(List *segments, SegmentType segmentType) errdetail("Internal error: No motion listener port (%s)", segdbDesc->whoami))); successful_connections++; connStatusDone[i] = true; - + /* the connection of segdbDesc is established successfully, calculate the time of establishConnTime */ + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_SUBTRACT(endtime, starttime); + segdbDesc->establishConnTime = INSTR_TIME_GET_MILLISEC(endtime); continue; case PGRES_POLLING_READING: diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 99f76487f83..c05e7ec9896 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -160,6 +160,7 @@ bool Debug_datumstream_read_print_varlena_info = false; bool Debug_datumstream_write_use_small_initial_buffers = false; bool gp_create_table_random_default_distribution = true; bool gp_allow_non_uniform_partitioning_ddl = true; +bool gp_print_create_gang_time = false; int dtx_phase2_retry_second = 0; bool gp_log_suboverflow_statement = false; @@ -1800,6 +1801,17 @@ struct config_bool ConfigureNamesBool_gp[] = NULL, NULL, NULL }, + { + {"gp_print_create_gang_time", PGC_USERSET, CUSTOM_OPTIONS, + gettext_noop("Allow print information about create gang time."), + NULL, + GUC_NOT_IN_SAMPLE + }, + &gp_print_create_gang_time, + false, + NULL, NULL, NULL + }, + { {"gp_recursive_cte_prototype", PGC_USERSET, DEPRECATED_OPTIONS, gettext_noop("Enable RECURSIVE clauses in CTE queries (deprecated option, use \"gp_recursive_cte\" instead)."), diff --git a/src/include/cdb/cdbconn.h b/src/include/cdb/cdbconn.h index 1c6ec0bbcbd..a55aaa987e6 100644 --- a/src/include/cdb/cdbconn.h +++ b/src/include/cdb/cdbconn.h @@ -53,6 +53,8 @@ typedef struct SegmentDatabaseDescriptor char *whoami; /* QE identifier for msgs */ bool isWriter; int identifier; /* unique identifier in the cdbcomponent segment pool */ + double establishConnTime; /* the time of establish connection to the segment, + * -1 means this connection is cached */ } SegmentDatabaseDescriptor; SegmentDatabaseDescriptor * diff --git a/src/include/cdb/cdbgang.h b/src/include/cdb/cdbgang.h index cffa7ecb84f..b344546e535 100644 --- a/src/include/cdb/cdbgang.h +++ b/src/include/cdb/cdbgang.h @@ -130,5 +130,6 @@ typedef struct CdbProcess typedef Gang *(*CreateGangFunc)(List *segments, SegmentType segmentType); extern Datum gp_backend_info(PG_FUNCTION_ARGS); +extern void printCreateGangTime(int sliceId, Gang *gang); #endif /* _CDBGANG_H_ */ diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 845ba9290e8..74e0e018000 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -256,6 +256,7 @@ "gp_max_plan_size", "gp_motion_cost_per_row", "gp_predicate_pushdown_sample_rows", + "gp_print_create_gang_time", "gp_qd_hostname", "gp_qd_port", "gp_recursive_cte", diff --git a/src/test/regress/init_file b/src/test/regress/init_file index 25fff9c3318..65e735a8e00 100644 --- a/src/test/regress/init_file +++ b/src/test/regress/init_file @@ -155,4 +155,10 @@ s/.//gs m/reset enable_parallel;/ s/.//gs +# ignore establish time and segindex num +m/ The shortest establish conn time: \d+\.\d+ ms, segindex: \d+,/ +s/ The shortest establish conn time: \d+\.\d+ ms, segindex: \d+,/ The shortest establish conn time: xx\.xx ms, segindex: xx,/ +m/ The longest establish conn time: \d+\.\d+ ms, segindex: \d+/ +s/ The longest establish conn time: \d+\.\d+ ms, segindex: \d+/ The longest establish conn time: xx\.xx ms, segindex: xx/ + -- end_matchsubs diff --git a/src/test/regress/input/dispatch.source b/src/test/regress/input/dispatch.source index 4175d78dd25..d058b8ea619 100644 --- a/src/test/regress/input/dispatch.source +++ b/src/test/regress/input/dispatch.source @@ -557,3 +557,35 @@ select count(*) from gp_segment_configuration a, t13393 ,gp_segment_configuratio SELECT gp_inject_fault('cdbcomponent_recycle_idle_qe_error', 'reset', dbid, current_setting('gp_session_id')::int) from gp_segment_configuration where content=-1 and role='p'; drop table t13393; + +-- test for print create time for gang. +CREATE OR REPLACE FUNCTION cleanupAllGangs() RETURNS BOOL +AS '@abs_builddir@/regress@DLSUFFIX@', 'cleanupAllGangs' LANGUAGE C; + +-- cleanupAllGangs(); +select cleanupAllGangs(); + +show gp_print_create_gang_time; + +-- create a new n-gang +set gp_print_create_gang_time=on; + +set optimizer=off; + +--gang reused +create table t_create_gang_time(tc1 int,tc2 int); + +--1-gang reused +select * from t_create_gang_time where tc1=1; +explain analyze select * from t_create_gang_time where tc1=1; + +--n-gang reused and 1-gang is created. +select * from t_create_gang_time t1, t_create_gang_time t2 where t1.tc1=2; + +explain analyze select * from t_create_gang_time t1, t_create_gang_time t2 where t1.tc1=2; + +reset gp_print_create_gang_time; +reset optimizer; +drop function cleanupAllGangs(); +drop table t_create_gang_time; + diff --git a/src/test/regress/output/dispatch.source b/src/test/regress/output/dispatch.source index 185c3257de0..4c94628f706 100644 --- a/src/test/regress/output/dispatch.source +++ b/src/test/regress/output/dispatch.source @@ -906,3 +906,86 @@ SELECT gp_inject_fault('cdbcomponent_recycle_idle_qe_error', 'reset', dbid, curr (1 row) drop table t13393; +-- test for print create time for gang. +CREATE OR REPLACE FUNCTION cleanupAllGangs() RETURNS BOOL +AS '@abs_builddir@/regress@DLSUFFIX@', 'cleanupAllGangs' LANGUAGE C; +-- cleanupAllGangs(); +select cleanupAllGangs(); + cleanupallgangs +----------------- + t +(1 row) + +show gp_print_create_gang_time; + gp_print_create_gang_time +--------------------------- + off +(1 row) + +-- create a new n-gang +set gp_print_create_gang_time=on; +INFO: The shortest establish conn time: 8.46 ms, segindex: 0, + The longest establish conn time: 9.87 ms, segindex: 2 +set optimizer=off; +INFO: (Gang) is reused +--gang reused +create table t_create_gang_time(tc1 int,tc2 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'tc1' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INFO: (Gang) is reused +--1-gang reused +select * from t_create_gang_time where tc1=1; +INFO: (Slice1) is reused + tc1 | tc2 +-----+----- +(0 rows) + +explain analyze select * from t_create_gang_time where tc1=1; +INFO: (Slice1) is reused + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..393.90 rows=86 width=8) (actual time=0.611..0.618 rows=0 loops=1) + -> Seq Scan on t_create_gang_time (cost=0.00..392.75 rows=29 width=8) (never executed) + Filter: (tc1 = 1) + Optimizer: Postgres query optimizer + Planning Time: 0.187 ms + (slice0) Executor memory: 36K bytes. + (slice1) Executor memory: 36K bytes (seg1). + Memory used: 128000kB + Execution Time: 1.249 ms +(9 rows) + +--n-gang reused and 1-gang is created. +select * from t_create_gang_time t1, t_create_gang_time t2 where t1.tc1=2; +INFO: (Slice1) is reused +INFO: (Slice2) The shortest establish conn time: 4.80 ms, segindex: 0, + The longest establish conn time: 4.80 ms, segindex: 0 + tc1 | tc2 | tc1 | tc2 +-----+-----+-----+----- +(0 rows) + +explain analyze select * from t_create_gang_time t1, t_create_gang_time t2 where t1.tc1=2; +INFO: (Slice1) is reused +INFO: (Slice2) is reused + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000130517.82 rows=7413210 width=16) (actual time=0.968..0.974 rows=0 loops=1) + -> Nested Loop (cost=10000000000.00..10000031675.02 rows=2471070 width=16) (never executed) + -> Broadcast Motion 1:3 (slice2; segments: 1) (cost=0.00..393.90 rows=86 width=8) (never executed) + -> Seq Scan on t_create_gang_time t1 (cost=0.00..392.75 rows=29 width=8) (never executed) + Filter: (tc1 = 2) + -> Materialize (cost=0.00..464.50 rows=28700 width=8) (never executed) + -> Seq Scan on t_create_gang_time t2 (cost=0.00..321.00 rows=28700 width=8) (never executed) + Optimizer: Postgres query optimizer + Planning Time: 0.285 ms + (slice0) Executor memory: 38K bytes. + (slice1) Executor memory: 39K bytes avg x 3 workers, 39K bytes max (seg0). + (slice2) Executor memory: 36K bytes (seg0). + Memory used: 128000kB + Execution Time: 1.948 ms +(14 rows) + +reset gp_print_create_gang_time; +reset optimizer; +drop function cleanupAllGangs(); +drop table t_create_gang_time; From 5e3dfddb45513c2965fd6bbb37c974a6bcb1707f Mon Sep 17 00:00:00 2001 From: Adam Lee Date: Tue, 22 Nov 2022 16:07:00 +0800 Subject: [PATCH 27/46] Order items in sync/unsync_guc_name.h Items in these two files should be ordered. --- src/include/utils/sync_guc_name.h | 9 +++++---- src/include/utils/unsync_guc_name.h | 23 ++++++++++++----------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 1eb15e7678c..651fc567562 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -8,6 +8,7 @@ * src/include/utils/sync_guc_name.h *-------------------------------------------------------------------------- */ +/* items in this file should be ordered */ "backtrace_functions", "bytea_output", "client_min_messages", @@ -42,6 +43,7 @@ "gp_ignore_error_table", "gp_indexcheck_insert", "gp_initial_bad_row_limit", + "gp_interconnect_address_type", "gp_interconnect_debug_retry_interval", "gp_interconnect_default_rtt", "gp_interconnect_fc_method", @@ -58,7 +60,6 @@ "gp_interconnect_timer_period", "gp_interconnect_transmit_timeout", "gp_interconnect_type", - "gp_interconnect_address_type", "gp_log_endpoints", "gp_log_interconnect", "gp_log_resgroup_memory", @@ -68,6 +69,7 @@ "gp_max_packet_size", "gp_max_slices", "gp_motion_slice_noop", + "gp_resgroup_debug_wait_queue", "gp_resgroup_memory_policy_auto_fixed_mem", "gp_resgroup_print_operator_memory_limits", "gp_resqueue_memory_policy_auto_fixed_mem", @@ -128,8 +130,8 @@ "optimizer_partition_selection_log", "optimizer_plan_id", "optimizer_push_group_by_below_setop_threshold", - "optimizer_xform_bind_threshold", "optimizer_samples_number", + "optimizer_xform_bind_threshold", "parallel_setup_cost", "parallel_tuple_cost", "planner_work_mem", @@ -145,16 +147,15 @@ "temp_tablespaces", "test_copy_qd_qe_split", "test_print_prefetch_joinqual", + "TimeZone", "trace_syncscan", "track_wal_io_timing", - "TimeZone", "vacuum_failsafe_age", "vacuum_multixact_failsafe_age", "verify_gpfdists_cert", "vmem_process_interrupt", "wal_debug", "work_mem", - "gp_resgroup_debug_wait_queue", "gp_appendonly_insert_files", "gp_appendonly_insert_files_tuples_range", "gp_random_insert_segments", diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 74e0e018000..d0a030550a5 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -8,6 +8,7 @@ * src/include/utils/unsync_guc_name.h *-------------------------------------------------------------------------- */ +/* items in this file should be ordered */ "allow_segment_DML", "allow_system_table_mods", "enable_answer_query_using_materialized_views", @@ -173,10 +174,10 @@ "gp_appendonly_verify_block_checksums", "gp_appendonly_verify_write_block", "gp_auth_time_override", + "gp_autostats_allow_nonowner", "gp_autostats_mode", "gp_autostats_mode_in_functions", "gp_autostats_on_change_threshold", - "gp_autostats_allow_nonowner", "gp_cached_segworkers_threshold", "gp_command_count", "gp_connection_send_timeout", @@ -187,9 +188,9 @@ "gp_dbid", "gp_debug_pgproc", "gp_debug_resqueue_priority", + "gp_dispatch_keepalives_count", "gp_dispatch_keepalives_idle", "gp_dispatch_keepalives_interval", - "gp_dispatch_keepalives_count", "gp_distinct_grouping_sets_threshold", "gp_dtx_recovery_interval", "gp_dtx_recovery_prepared_period", @@ -222,6 +223,7 @@ "gp_encoding_check_locale_compatibility", "gp_external_enable_exec", "gp_external_max_segs", + "gpfdist_retry_timeout", "gp_fts_mark_mirror_down_grace_period", "gp_fts_replication_attempt_count", #ifdef USE_INTERNAL_FTS @@ -255,6 +257,7 @@ "gp_max_local_distributed_cache", "gp_max_plan_size", "gp_motion_cost_per_row", + "gp_pause_on_restore_point_replay", "gp_predicate_pushdown_sample_rows", "gp_print_create_gang_time", "gp_qd_hostname", @@ -266,12 +269,12 @@ "gp_reraise_signal", "gp_resgroup_memory_policy", "gp_resource_group_bypass", + "gp_resource_group_cpu_ceiling_enforcement", "gp_resource_group_cpu_limit", "gp_resource_group_cpu_priority", - "gp_resource_group_cpu_ceiling_enforcement", + "gp_resource_group_enable_recalculate_query_mem", "gp_resource_group_memory_limit", "gp_resource_group_queuing_timeout", - "gp_resource_group_enable_recalculate_query_mem", "gp_resource_manager", "gp_resqueue_memory_policy", "gp_resqueue_priority", @@ -303,7 +306,6 @@ "gp_vmem_limit_per_query", "gp_vmem_protect_limit", "gp_vmem_protect_segworker_cache_limit", - "gp_pause_on_restore_point_replay", "gp_workfile_limit_per_segment", "gp_workfile_max_entries", "gp_write_shared_snapshot", @@ -422,10 +424,9 @@ "optimizer_enable_hashagg", "optimizer_enable_hashjoin", "optimizer_enable_hashjoin_redistribute_broadcast_children", - "optimizer_enable_nljoin", "optimizer_enable_indexjoin", - "optimizer_enable_indexscan", "optimizer_enable_indexonlyscan", + "optimizer_enable_indexscan", "optimizer_enable_master_only_queries", "optimizer_enable_materialize", "optimizer_enable_mergejoin", @@ -435,25 +436,26 @@ "optimizer_enable_motions", "optimizer_enable_motions_masteronly_queries", "optimizer_enable_multiple_distinct_aggs", + "optimizer_enable_nljoin", "optimizer_enable_outerjoin_rewrite", "optimizer_enable_outerjoin_to_unionall_rewrite", "optimizer_enable_partial_index", "optimizer_enable_partition_propagation", "optimizer_enable_partition_selection", "optimizer_enable_range_predicate_dpe", + "optimizer_enable_redistribute_nestloop_loj_inner_child", + "optimizer_enable_replicated_table", "optimizer_enable_sort", "optimizer_enable_space_pruning", "optimizer_enable_streaming_material", "optimizer_enable_tablescan", - "optimizer_enable_redistribute_nestloop_loj_inner_child", - "optimizer_force_comprehensive_join_implementation", - "optimizer_enable_replicated_table", "optimizer_enforce_subplans", "optimizer_enumerate_plans", "optimizer_expand_fulljoin", "optimizer_extract_dxl_stats", "optimizer_extract_dxl_stats_all_nodes", "optimizer_force_agg_skew_avoidance", + "optimizer_force_comprehensive_join_implementation", "optimizer_force_expanded_distinct_aggs", "optimizer_force_multistage_agg", "optimizer_force_three_stage_scalar_dqa", @@ -637,7 +639,6 @@ "wal_writer_delay", "wal_writer_flush_after", "writable_external_table_bufsize", - "gpfdist_retry_timeout", "xid_stop_limit", "xid_warn_limit", "xmlbinary", From 89f66025e769e7abed9f8767ee0e22e49c1f0075 Mon Sep 17 00:00:00 2001 From: Zhenglong Li Date: Wed, 2 Nov 2022 09:14:33 +0800 Subject: [PATCH 28/46] add postmaster and all auxiliary processes to a same cgroup (#14256) Currently, the postmaster process will be added to the parent cgroup, and all the auxiliary processes, such as BgWriter, SysLogger, will be added to the cgroup of user.slice, if we enable the resource group. We can not control the resource usage of the cgroup of user.slice, and it's difficult to calculate the proportion between the resource usage of the parent group and child group, the Linux Cgroup document doesn't explain it either. So this PR created a new control group, named "system_group", to control the resource usage of the postmaster process and all other auxiliary processes. And this PR uses the below principle: When a process forks a child process, the new process is born into the cgroup that the forking process belongs to at the time of the operation. After exit, a process stays associated with the cgroup that it belonged to at the time of exit until it's reaped; --- src/backend/postmaster/postmaster.c | 6 ++ .../utils/resgroup/resgroup-ops-linux.c | 8 +-- src/backend/utils/resgroup/resgroup.c | 2 - src/bin/pg_dump/pg_dumpall.c | 5 +- src/include/catalog/pg_resgroup.dat | 2 + src/include/catalog/pg_resgroupcapability.dat | 9 ++- src/include/utils/resgroup-ops.h | 11 ++++ .../expected/resgroup/resgroup_dumpinfo.out | 2 +- .../resgroup/resgroup_name_convention.out | 2 +- .../expected/resgroup/resgroup_syntax.out | 5 +- .../expected/resgroup/resgroup_views.out | 55 +++++++++++-------- .../output/resgroup/disable_resgroup.source | 2 + .../output/resgroup/enable_resgroup.source | 5 +- .../sql/resgroup/resgroup_dumpinfo.sql | 2 +- .../sql/resgroup/resgroup_name_convention.sql | 1 + 15 files changed, 78 insertions(+), 39 deletions(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 0d43872ebae..6ba0943ef70 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -159,6 +159,8 @@ #include "cdb/cdbendpoint.h" #include "cdb/ic_proxy_bgworker.h" #include "utils/metrics_utils.h" +#include "utils/resource_manager.h" +#include "utils/resgroup-ops.h" /* * This is set in backends that are handling a GPDB specific message (FTS or @@ -1587,6 +1589,10 @@ PostmasterMain(int argc, char *argv[]) */ RemovePgTempFiles(); + /* If enabled, init cgroup */ + if (IsResGroupEnabled()) + ResGroupOps_Init(); + /* * Initialize stats collection subsystem (this does NOT start the * collector process!) diff --git a/src/backend/utils/resgroup/resgroup-ops-linux.c b/src/backend/utils/resgroup/resgroup-ops-linux.c index 24001a9c570..8742ec2e3f7 100644 --- a/src/backend/utils/resgroup/resgroup-ops-linux.c +++ b/src/backend/utils/resgroup/resgroup-ops-linux.c @@ -1489,11 +1489,11 @@ ResGroupOps_Init(void) initCpuSet(); /* - * Put postmaster and all the children processes into the gpdb cgroup, - * otherwise auxiliary processes might get too low priority when - * gp_resource_group_cpu_priority is set to a large value + * Create the auxiliary process cgroup, and put postmaster and all the + * children processes into the group. */ - ResGroupOps_AssignGroup(RESGROUP_ROOT_ID, NULL, PostmasterPid); + ResGroupOps_CreateGroup(RESGROUP_AUXILIARY_PROCESS_GROUP_ID); + ResGroupOps_AssignGroup(RESGROUP_AUXILIARY_PROCESS_GROUP_ID, NULL, PostmasterPid); } /* Adjust GUCs for this OS group implementation */ diff --git a/src/backend/utils/resgroup/resgroup.c b/src/backend/utils/resgroup/resgroup.c index a8a830d5dcd..e806b693d78 100644 --- a/src/backend/utils/resgroup/resgroup.c +++ b/src/backend/utils/resgroup/resgroup.c @@ -597,8 +597,6 @@ InitResGroups(void) errmsg("insufficient memory available"), errhint("Increase gp_resource_group_memory_limit"))); - ResGroupOps_Init(); - if (gp_resource_group_enable_cgroup_cpuset) { /* Get cpuset from cpuset/gpdb, and transform it into bitset */ diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 1b2ba109c8e..a858ca4615f 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -842,8 +842,10 @@ dumpResGroups(PGconn *conn) */ fprintf(OPF, "ALTER RESOURCE GROUP \"admin_group\" SET cpu_rate_limit 1;\n"); fprintf(OPF, "ALTER RESOURCE GROUP \"default_group\" SET cpu_rate_limit 1;\n"); + fprintf(OPF, "ALTER RESOURCE GROUP \"system_group\" SET cpu_rate_limit 1;\n"); fprintf(OPF, "ALTER RESOURCE GROUP \"admin_group\" SET memory_limit 1;\n"); fprintf(OPF, "ALTER RESOURCE GROUP \"default_group\" SET memory_limit 1;\n"); + fprintf(OPF, "ALTER RESOURCE GROUP \"system_group\" SET memory_limit 1;\n"); for (i = 0; i < PQntuples(res); i++) { @@ -867,7 +869,8 @@ dumpResGroups(PGconn *conn) resetPQExpBuffer(buf); - if (0 == strcmp(groupname, "default_group") || 0 == strcmp(groupname, "admin_group")) + if (0 == strcmp(groupname, "default_group") || 0 == strcmp(groupname, "admin_group") + || 0 == strcmp(groupname, "system_group")) { /* * We can't emit CREATE statements for the built-in groups as they diff --git a/src/include/catalog/pg_resgroup.dat b/src/include/catalog/pg_resgroup.dat index d123b4de8da..4b3b70f56f9 100644 --- a/src/include/catalog/pg_resgroup.dat +++ b/src/include/catalog/pg_resgroup.dat @@ -16,5 +16,7 @@ rsgname => 'default_group', parent => '0' }, { oid => '6438', oid_symbol => 'ADMINRESGROUP_OID', rsgname => 'admin_group', parent => '0' }, +{ oid => '6448', oid_symbol => 'DEFAULTAUXILIARY_OID', + rsgname => 'system_group', parent => '0' }, ] diff --git a/src/include/catalog/pg_resgroupcapability.dat b/src/include/catalog/pg_resgroupcapability.dat index 97c6d90d278..5f8f554427b 100644 --- a/src/include/catalog/pg_resgroupcapability.dat +++ b/src/include/catalog/pg_resgroupcapability.dat @@ -13,7 +13,7 @@ [ { resgroupid => '6437', reslimittype => '1', value => '20' }, -{ resgroupid => '6437', reslimittype => '2', value => '30' }, +{ resgroupid => '6437', reslimittype => '2', value => '20' }, { resgroupid => '6437', reslimittype => '3', value => '0' }, { resgroupid => '6437', reslimittype => '4', value => '80' }, { resgroupid => '6437', reslimittype => '5', value => '0' }, @@ -26,5 +26,12 @@ { resgroupid => '6438', reslimittype => '5', value => '0' }, { resgroupid => '6438', reslimittype => '6', value => '0' }, { resgroupid => '6438', reslimittype => '7', value => '-1' }, +{ resgroupid => '6448', reslimittype => '1', value => '0' }, +{ resgroupid => '6448', reslimittype => '2', value => '10' }, +{ resgroupid => '6448', reslimittype => '3', value => '0' }, +{ resgroupid => '6448', reslimittype => '4', value => '0' }, +{ resgroupid => '6448', reslimittype => '5', value => '0' }, +{ resgroupid => '6448', reslimittype => '6', value => '0' }, +{ resgroupid => '6448', reslimittype => '7', value => '-1' }, ] diff --git a/src/include/utils/resgroup-ops.h b/src/include/utils/resgroup-ops.h index 5fa90ceb7ae..380c4d9dd24 100644 --- a/src/include/utils/resgroup-ops.h +++ b/src/include/utils/resgroup-ops.h @@ -32,6 +32,17 @@ typedef enum #define RESGROUP_ROOT_ID (InvalidOid) +/* + * Default cpu group for postmaster process and it's auxiliary processes, such as + * BgWriter, SysLogger, WalWriter and so on. Because those auxiliary processes are + * created in different time and it's hard to add them into a same cgroup through + * their entrance, so we will create a default cpu group at the beginning of database + * start. + * + * This is a hard code programing, but we can't avoid it. + */ +#define RESGROUP_AUXILIARY_PROCESS_GROUP_ID 6441 + /* * Default cpuset group is a group manages the cpu cores which not belong to * any other cpuset group. All the processes which not belong to any cpuset diff --git a/src/test/isolation2/expected/resgroup/resgroup_dumpinfo.out b/src/test/isolation2/expected/resgroup/resgroup_dumpinfo.out index d9ab35ad819..4f4e18f2e31 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_dumpinfo.out +++ b/src/test/isolation2/expected/resgroup/resgroup_dumpinfo.out @@ -11,7 +11,7 @@ CREATE CREATE FUNCTION dump_test_check() RETURNS bool as $$ import json import pg def validate(json_obj, segnum): array = json_obj.get("info") #validate segnum if len(array) != segnum: return False qd_info = [j for j in array if j["segid"] == -1][0] #validate keys keys = ["segid", "segmentsOnMaster", "loaded", "totalChunks", "freeChunks", "chunkSizeInBits", "groups"] for key in keys: if key not in qd_info: return False -groups = [g for g in qd_info["groups"] if g["group_id"] > 6438] #validate user created group if len(groups) != 1: return False group = groups[0] #validate group keys keys = ["group_id", "nRunning", "locked_for_drop", "memExpected", "memQuotaGranted", "memSharedGranted", "memQuotaUsed", "memUsage", "memSharedUsage"] for key in keys: if key not in group: return False +groups = [g for g in qd_info["groups"] if g["group_id"] > 6441] #validate user created group if len(groups) != 1: return False group = groups[0] #validate group keys keys = ["group_id", "nRunning", "locked_for_drop", "memExpected", "memQuotaGranted", "memSharedGranted", "memQuotaUsed", "memUsage", "memSharedUsage"] for key in keys: if key not in group: return False #validate waitqueue wait_queue = group["wait_queue"] if wait_queue["wait_queue_size"] != 1: return False #validate nrunning nrunning = group["nRunning"] if nrunning != 2: return False return True conn = pg.connect(dbname="postgres") diff --git a/src/test/isolation2/expected/resgroup/resgroup_name_convention.out b/src/test/isolation2/expected/resgroup/resgroup_name_convention.out index 97e03cf54c5..a29307bfe18 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_name_convention.out +++ b/src/test/isolation2/expected/resgroup/resgroup_name_convention.out @@ -18,7 +18,7 @@ -- setup -- -CREATE OR REPLACE VIEW rg_name_view AS SELECT S.rsgname, C.concurrency FROM gp_toolkit.gp_resgroup_config C, gp_toolkit.gp_resgroup_status S WHERE C.groupid = S.groupid AND C.groupname != 'default_group' AND C.groupname != 'admin_group' ORDER BY C.groupid; +CREATE OR REPLACE VIEW rg_name_view AS SELECT S.rsgname, C.concurrency FROM gp_toolkit.gp_resgroup_config C, gp_toolkit.gp_resgroup_status S WHERE C.groupid = S.groupid AND C.groupname != 'default_group' AND C.groupname != 'admin_group' AND C.groupname != 'system_group' ORDER BY C.groupid; CREATE -- TODO: need to cleanup all existing resgroups diff --git a/src/test/isolation2/expected/resgroup/resgroup_syntax.out b/src/test/isolation2/expected/resgroup/resgroup_syntax.out index 4f293d7c8d3..cba7d0ec561 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_syntax.out +++ b/src/test/isolation2/expected/resgroup/resgroup_syntax.out @@ -96,9 +96,10 @@ ERROR: resource group "rg_test_group" does not exist SELECT * FROM gp_toolkit.gp_resgroup_config; groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+---------------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- - 6437 | default_group | 20 | 30 | 30 | 80 | 10 | vmtracker | -1 + 6437 | default_group | 20 | 20 | 30 | 80 | 10 | vmtracker | -1 + 6441 | system_group | 0 | 10 | 0 | 0 | 0 | vmtracker | -1 6438 | admin_group | 2 | 10 | 10 | 80 | 10 | vmtracker | -1 -(2 rows) +(3 rows) -- negative diff --git a/src/test/isolation2/expected/resgroup/resgroup_views.out b/src/test/isolation2/expected/resgroup/resgroup_views.out index 9f158654515..a9c8f7a9bc7 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_views.out +++ b/src/test/isolation2/expected/resgroup/resgroup_views.out @@ -1,7 +1,7 @@ select * from gp_toolkit.gp_resgroup_config where groupname='default_group'; groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+---------------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- - 6437 | default_group | 20 | 30 | 30 | 80 | 10 | vmtracker | -1 + 6437 | default_group | 20 | 20 | 30 | 80 | 10 | vmtracker | -1 (1 row) select rsgname , groupid , num_running , num_queueing , num_queued , num_executed , cpu_usage->'-1' as qd_cpu_usage , memory_usage->'-1'->'used' as qd_memory_used , memory_usage->'-1'->'shared_used' as qd_memory_shared_used from gp_toolkit.gp_resgroup_status where rsgname='default_group'; @@ -29,31 +29,38 @@ select rsgname , groupid , segment_id , cpu , memory_used , memory_shared_used f select * from gp_toolkit.gp_resgroup_config; groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+---------------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- - 6437 | default_group | 20 | 30 | 30 | 80 | 10 | vmtracker | -1 + 6437 | default_group | 20 | 20 | 30 | 80 | 10 | vmtracker | -1 + 6441 | system_group | -1 | 10 | 0 | 0 | 0 | vmtracker | -1 6438 | admin_group | 2 | 10 | 10 | 80 | 10 | vmtracker | -1 -(2 rows) +(3 rows) select * from gp_toolkit.gp_resgroup_status; - rsgname | groupid | num_running | num_queueing | num_queued | num_executed | total_queue_duration | cpu_usage | memory_usage ----------------+---------+-------------+--------------+------------+--------------+----------------------+-------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - default_group | 6437 | 0 | 0 | 0 | 0 | @ 0 | {"-1":0.00, "0":0.00, "1":0.00, "2":0.00} | {"-1":{"used":0, "available":204, "quota_used":0, "quota_available":40, "quota_granted":40, "quota_proposed":40, "shared_used":0, "shared_available":164, "shared_granted":164, "shared_proposed":164}, "0":{"used":0, "available":204, "quota_used":0, "quota_available":40, "quota_granted":40, "quota_proposed":40, "shared_used":0, "shared_available":164, "shared_granted":164, "shared_proposed":164}, "1":{"used":0, "available":204, "quota_used":0, "quota_available":40, "quota_granted":40, "quota_proposed":40, "shared_used":0, "shared_available":164, "shared_granted":164, "shared_proposed":164}, "2":{"used":0, "available":204, "quota_used":0, "quota_available":40, "quota_granted":40, "quota_proposed":40, "shared_used":0, "shared_available":164, "shared_granted":164, "shared_proposed":164}} - admin_group | 6438 | 1 | 0 | 0 | 19 | @ 0 | {"-1":0.41, "0":0.18, "1":0.14, "2":0.10} | {"-1":{"used":0, "available":68, "quota_used":6, "quota_available":6, "quota_granted":12, "quota_proposed":12, "shared_used":0, "shared_available":56, "shared_granted":56, "shared_proposed":56}, "0":{"used":0, "available":68, "quota_used":6, "quota_available":6, "quota_granted":12, "quota_proposed":12, "shared_used":0, "shared_available":56, "shared_granted":56, "shared_proposed":56}, "1":{"used":0, "available":68, "quota_used":6, "quota_available":6, "quota_granted":12, "quota_proposed":12, "shared_used":0, "shared_available":56, "shared_granted":56, "shared_proposed":56}, "2":{"used":0, "available":68, "quota_used":6, "quota_available":6, "quota_granted":12, "quota_proposed":12, "shared_used":0, "shared_available":56, "shared_granted":56, "shared_proposed":56}} -(2 rows) + rsgname | groupid | num_running | num_queueing | num_queued | num_executed | total_queue_duration | cpu_usage | memory_usage +---------------+---------+-------------+--------------+------------+--------------+----------------------+-----------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + default_group | 6437 | 0 | 0 | 0 | 0 | @ 0 | {'-1': 0.0, '0': 0.0, '1': 0.0, '2': 0.0} | {'-1': {'used': 0, 'available': 204, 'quota_used': 0, 'quota_available': 40, 'quota_granted': 40, 'quota_proposed': 40, 'shared_used': 0, 'shared_available': 164, 'shared_granted': 164, 'shared_proposed': 164}, '0': {'used': 0, 'available': 204, 'quota_used': 0, 'quota_available': 40, 'quota_granted': 40, 'quota_proposed': 40, 'shared_used': 0, 'shared_available': 164, 'shared_granted': 164, 'shared_proposed': 164}, '1': {'used': 0, 'available': 204, 'quota_used': 0, 'quota_available': 40, 'quota_granted': 40, 'quota_proposed': 40, 'shared_used': 0, 'shared_available': 164, 'shared_granted': 164, 'shared_proposed': 164}, '2': {'used': 0, 'available': 204, 'quota_used': 0, 'quota_available': 40, 'quota_granted': 40, 'quota_proposed': 40, 'shared_used': 0, 'shared_available': 164, 'shared_granted': 164, 'shared_proposed': 164}} + admin_group | 6438 | 1 | 0 | 0 | 19 | @ 0 | {'-1': 0.31, '0': 0.11, '1': 0.08, '2': 0.08} | {'-1': {'used': 0, 'available': 68, 'quota_used': 6, 'quota_available': 6, 'quota_granted': 12, 'quota_proposed': 12, 'shared_used': 0, 'shared_available': 56, 'shared_granted': 56, 'shared_proposed': 56}, '0': {'used': 0, 'available': 68, 'quota_used': 6, 'quota_available': 6, 'quota_granted': 12, 'quota_proposed': 12, 'shared_used': 0, 'shared_available': 56, 'shared_granted': 56, 'shared_proposed': 56}, '1': {'used': 0, 'available': 68, 'quota_used': 6, 'quota_available': 6, 'quota_granted': 12, 'quota_proposed': 12, 'shared_used': 0, 'shared_available': 56, 'shared_granted': 56, 'shared_proposed': 56}, '2': {'used': 0, 'available': 68, 'quota_used': 6, 'quota_available': 6, 'quota_granted': 12, 'quota_proposed': 12, 'shared_used': 0, 'shared_available': 56, 'shared_granted': 56, 'shared_proposed': 56}} + system_group | 6441 | 0 | 0 | 0 | 0 | @ 0 | {'-1': 0.08, '0': 0.07, '1': 0.07, '2': 0.07} | {'-1': {'used': 0, 'available': 0, 'quota_used': 0, 'quota_available': 0, 'quota_granted': 0, 'quota_proposed': 0, 'shared_used': 0, 'shared_available': 0, 'shared_granted': 0, 'shared_proposed': 0}, '0': {'used': 0, 'available': 0, 'quota_used': 0, 'quota_available': 0, 'quota_granted': 0, 'quota_proposed': 0, 'shared_used': 0, 'shared_available': 0, 'shared_granted': 0, 'shared_proposed': 0}, '1': {'used': 0, 'available': 0, 'quota_used': 0, 'quota_available': 0, 'quota_granted': 0, 'quota_proposed': 0, 'shared_used': 0, 'shared_available': 0, 'shared_granted': 0, 'shared_proposed': 0}, '2': {'used': 0, 'available': 0, 'quota_used': 0, 'quota_available': 0, 'quota_granted': 0, 'quota_proposed': 0, 'shared_used': 0, 'shared_available': 0, 'shared_granted': 0, 'shared_proposed': 0}} +(3 rows) select * from gp_toolkit.gp_resgroup_status_per_host; - rsgname | groupid | hostname | cpu | memory_used | memory_available | memory_quota_used | memory_quota_available | memory_shared_used | memory_shared_available ----------------+---------+---------------+------+-------------+------------------+-------------------+------------------------+--------------------+------------------------- - admin_group | 6438 | nyu-vm-ubuntu | 0.83 | 1 | 271 | 24 | 24 | 0 | 224 - default_group | 6437 | nyu-vm-ubuntu | 0.00 | 0 | 816 | 0 | 160 | 0 | 656 -(2 rows) + rsgname | groupid | hostname | cpu | memory_used | memory_available | memory_quota_used | memory_quota_available | memory_shared_used | memory_shared_available +---------------+---------+----------+------+-------------+------------------+-------------------+------------------------+--------------------+------------------------- + admin_group | 6438 | zero | 0.14 | 1 | 271 | 24 | 24 | 0 | 224 + default_group | 6437 | zero | 0.00 | 0 | 816 | 0 | 160 | 0 | 656 + system_group | 6441 | zero | 0.08 | 0 | 0 | 0 | 0 | 0 | 0 +(3 rows) select * from gp_toolkit.gp_resgroup_status_per_segment; - rsgname | groupid | hostname | segment_id | cpu | memory_used | memory_available | memory_quota_used | memory_quota_available | memory_shared_used | memory_shared_available ----------------+---------+---------------+------------+------+-------------+------------------+-------------------+------------------------+--------------------+------------------------- - admin_group | 6438 | nyu-vm-ubuntu | 0 | 0.13 | 0 | 68 | 6 | 6 | 0 | 56 - default_group | 6437 | nyu-vm-ubuntu | 1 | 0.00 | 0 | 204 | 0 | 40 | 0 | 164 - default_group | 6437 | nyu-vm-ubuntu | 0 | 0.00 | 0 | 204 | 0 | 40 | 0 | 164 - default_group | 6437 | nyu-vm-ubuntu | -1 | 0.00 | 0 | 204 | 0 | 40 | 0 | 164 - default_group | 6437 | nyu-vm-ubuntu | 2 | 0.00 | 0 | 204 | 0 | 40 | 0 | 164 - admin_group | 6438 | nyu-vm-ubuntu | 1 | 0.10 | 0 | 68 | 6 | 6 | 0 | 56 - admin_group | 6438 | nyu-vm-ubuntu | -1 | 0.26 | 1 | 67 | 6 | 6 | 0 | 56 - admin_group | 6438 | nyu-vm-ubuntu | 2 | 0.10 | 0 | 68 | 6 | 6 | 0 | 56 -(8 rows) + rsgname | groupid | hostname | segment_id | cpu | memory_used | memory_available | memory_quota_used | memory_quota_available | memory_shared_used | memory_shared_available +---------------+---------+----------+------------+------+-------------+------------------+-------------------+------------------------+--------------------+------------------------- + admin_group | 6438 | zero | -1 | 0.25 | 1 | 67 | 6 | 6 | 0 | 56 + admin_group | 6438 | zero | 0 | 0.13 | 0 | 68 | 6 | 6 | 0 | 56 + admin_group | 6438 | zero | 1 | 0.10 | 0 | 68 | 6 | 6 | 0 | 56 + admin_group | 6438 | zero | 2 | 0.10 | 0 | 68 | 6 | 6 | 0 | 56 + default_group | 6437 | zero | -1 | 0.00 | 0 | 204 | 0 | 40 | 0 | 164 + default_group | 6437 | zero | 0 | 0.00 | 0 | 204 | 0 | 40 | 0 | 164 + default_group | 6437 | zero | 1 | 0.00 | 0 | 204 | 0 | 40 | 0 | 164 + default_group | 6437 | zero | 2 | 0.00 | 0 | 204 | 0 | 40 | 0 | 164 + system_group | 6441 | zero | -1 | 0.08 | 0 | 0 | 0 | 0 | 0 | 0 + system_group | 6441 | zero | 0 | 0.06 | 0 | 0 | 0 | 0 | 0 | 0 + system_group | 6441 | zero | 1 | 0.06 | 0 | 0 | 0 | 0 | 0 | 0 + system_group | 6441 | zero | 2 | 0.06 | 0 | 0 | 0 | 0 | 0 | 0 +(12 rows) -- end_ignore diff --git a/src/test/isolation2/output/resgroup/disable_resgroup.source b/src/test/isolation2/output/resgroup/disable_resgroup.source index 36467bc37d9..332bafc0913 100644 --- a/src/test/isolation2/output/resgroup/disable_resgroup.source +++ b/src/test/isolation2/output/resgroup/disable_resgroup.source @@ -2,10 +2,12 @@ ! ls -d @cgroup_mnt_point@/cpu/gpdb/*/; @cgroup_mnt_point@/cpu/gpdb/6437/ @cgroup_mnt_point@/cpu/gpdb/6438/ +@cgroup_mnt_point@/cpu/gpdb/6441/ ! ls -d @cgroup_mnt_point@/cpuacct/gpdb/*/; @cgroup_mnt_point@/cpuacct/gpdb/6437/ @cgroup_mnt_point@/cpuacct/gpdb/6438/ +@cgroup_mnt_point@/cpuacct/gpdb/6441/ -- reset the GUC and restart cluster. diff --git a/src/test/isolation2/output/resgroup/enable_resgroup.source b/src/test/isolation2/output/resgroup/enable_resgroup.source index 6811af1ef51..1553ebe6419 100644 --- a/src/test/isolation2/output/resgroup/enable_resgroup.source +++ b/src/test/isolation2/output/resgroup/enable_resgroup.source @@ -73,9 +73,10 @@ DO 0: SELECT * from gp_toolkit.gp_resgroup_config; groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+---------------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- - 6437 | default_group | 20 | 30 | 0 | 80 | 0 | vmtracker | -1 + 6437 | default_group | 20 | 20 | 0 | 80 | 0 | vmtracker | -1 6438 | admin_group | 10 | 10 | 10 | 80 | 0 | vmtracker | -1 -(2 rows) + 6441 | system_group | 0 | 10 | 0 | 0 | 0 | vmtracker | -1 +(3 rows) -- by default admin_group has concurrency set to -1 which leads to -- very small memory quota for each resgroup slot, correct it. diff --git a/src/test/isolation2/sql/resgroup/resgroup_dumpinfo.sql b/src/test/isolation2/sql/resgroup/resgroup_dumpinfo.sql index 3254575cc66..c6b4c490e9a 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_dumpinfo.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_dumpinfo.sql @@ -23,7 +23,7 @@ def validate(json_obj, segnum): if key not in qd_info: return False - groups = [g for g in qd_info["groups"] if g["group_id"] > 6438] + groups = [g for g in qd_info["groups"] if g["group_id"] > 6441] #validate user created group if len(groups) != 1: return False diff --git a/src/test/isolation2/sql/resgroup/resgroup_name_convention.sql b/src/test/isolation2/sql/resgroup/resgroup_name_convention.sql index 662128bad35..4a1a87633b9 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_name_convention.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_name_convention.sql @@ -24,6 +24,7 @@ CREATE OR REPLACE VIEW rg_name_view AS WHERE C.groupid = S.groupid AND C.groupname != 'default_group' AND C.groupname != 'admin_group' + AND C.groupname != 'system_group' ORDER BY C.groupid; -- TODO: need to cleanup all existing resgroups From 5c457fbc3b684a4b34bca38ceadb725f7934c9eb Mon Sep 17 00:00:00 2001 From: FairyFar Date: Tue, 22 Nov 2022 08:43:28 +0800 Subject: [PATCH 29/46] Added a new view into gp_toolkit. (#13880) Added a new view into the resource manager tool gp_toolkit to perform the function that is used frequently: gp_toolkit.gp_resgroup_role: assigned resource group to roles. --- src/backend/catalog/gp_toolkit.sql | 24 +++++++++++++++++++ src/backend/catalog/gp_toolkit_test.sql | 2 ++ .../expected/resgroup/resgroup_views.out | 6 +++++ .../sql/resgroup/resgroup_views.sql | 4 ++++ 4 files changed, 36 insertions(+) diff --git a/src/backend/catalog/gp_toolkit.sql b/src/backend/catalog/gp_toolkit.sql index 915c69e55c6..5be9fe53bba 100644 --- a/src/backend/catalog/gp_toolkit.sql +++ b/src/backend/catalog/gp_toolkit.sql @@ -1833,6 +1833,30 @@ CREATE VIEW gp_toolkit.gp_resgroup_status_per_segment AS GRANT SELECT ON gp_toolkit.gp_resgroup_status_per_segment TO public; +-------------------------------------------------------------------------------- +-- @view: +-- gp_toolkit.gp_resgroup_role +-- +-- @doc: +-- Assigned resource group to roles +-- +-------------------------------------------------------------------------------- + +CREATE VIEW gp_toolkit.gp_resgroup_role +AS + SELECT + pgr.rolname AS rrrolname, + pgrg.rsgname AS rrrsgname + FROM + pg_catalog.pg_roles pgr + JOIN + pg_catalog.pg_resgroup pgrg + ON + pgr.rolresgroup = pgrg.oid + ; + +GRANT SELECT ON gp_toolkit.gp_resgroup_role TO public; + -------------------------------------------------------------------------------- -- AO/CO diagnostics functions -------------------------------------------------------------------------------- diff --git a/src/backend/catalog/gp_toolkit_test.sql b/src/backend/catalog/gp_toolkit_test.sql index 7ad658d7b96..20672a57cb5 100644 --- a/src/backend/catalog/gp_toolkit_test.sql +++ b/src/backend/catalog/gp_toolkit_test.sql @@ -96,4 +96,6 @@ select * from gp_toolkit.gp_size_of_partition_and_indexes_disk; select * from gp_toolkit.gp_size_of_schema_disk; select * from gp_toolkit.gp_size_of_database; +select * from gp_toolkit.gp_resgroup_role; + diff --git a/src/test/isolation2/expected/resgroup/resgroup_views.out b/src/test/isolation2/expected/resgroup/resgroup_views.out index a9c8f7a9bc7..bbf5aa7a61c 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_views.out +++ b/src/test/isolation2/expected/resgroup/resgroup_views.out @@ -22,6 +22,12 @@ select rsgname , groupid , segment_id , cpu , memory_used , memory_shared_used f default_group | 6437 | -1 | 0.00 | 0 | 0 (1 row) +select * from gp_toolkit.gp_resgroup_role where rrrolname='postgres'; + rrrolname | rrrsgname +-----------+------------- + postgres | admin_group +(1 row) + -- also log the raw output of the views, if any of above tests failed it is -- easier to find out the causes with these logs. diff --git a/src/test/isolation2/sql/resgroup/resgroup_views.sql b/src/test/isolation2/sql/resgroup/resgroup_views.sql index 15cdb09948f..834d0fa8933 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_views.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_views.sql @@ -34,6 +34,10 @@ select rsgname where rsgname='default_group' and segment_id=-1; +select * + from gp_toolkit.gp_resgroup_role + where rrrolname='postgres'; + -- also log the raw output of the views, if any of above tests failed it is -- easier to find out the causes with these logs. From ef13201ac11048670a063d9ffd28d8049f319783 Mon Sep 17 00:00:00 2001 From: Zhenglong Li Date: Tue, 22 Nov 2022 15:49:53 +0800 Subject: [PATCH 30/46] fix resgroup views cases (#14511) Fix the failed pipeline due to #13880 --- src/test/isolation2/expected/resgroup/resgroup_views.out | 4 ++-- src/test/isolation2/sql/resgroup/resgroup_views.sql | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/isolation2/expected/resgroup/resgroup_views.out b/src/test/isolation2/expected/resgroup/resgroup_views.out index bbf5aa7a61c..a31d5524b6d 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_views.out +++ b/src/test/isolation2/expected/resgroup/resgroup_views.out @@ -22,10 +22,10 @@ select rsgname , groupid , segment_id , cpu , memory_used , memory_shared_used f default_group | 6437 | -1 | 0.00 | 0 | 0 (1 row) -select * from gp_toolkit.gp_resgroup_role where rrrolname='postgres'; +select * from gp_toolkit.gp_resgroup_role where rrrolname='gpadmin'; rrrolname | rrrsgname -----------+------------- - postgres | admin_group + gpadmin | admin_group (1 row) -- also log the raw output of the views, if any of above tests failed it is diff --git a/src/test/isolation2/sql/resgroup/resgroup_views.sql b/src/test/isolation2/sql/resgroup/resgroup_views.sql index 834d0fa8933..2f751f41d44 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_views.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_views.sql @@ -36,7 +36,7 @@ select rsgname select * from gp_toolkit.gp_resgroup_role - where rrrolname='postgres'; + where rrrolname='gpadmin'; -- also log the raw output of the views, if any of above tests failed it is -- easier to find out the causes with these logs. From ee754fa0878e5c324f7ef19bacd72e21cbbb510e Mon Sep 17 00:00:00 2001 From: Zhenglong Li Date: Wed, 30 Nov 2022 13:37:30 +0800 Subject: [PATCH 31/46] refactor the original cgroup code and abstract the corresponding interface (#14343) This PR is the second step in refactoring the resource group. The first one is #14256. In this PR, we do not change any behavior of the resource group, we do not change the interface exposed to the resource manager, it just abstracts all the fundamental functions to the struct CGroupOpsRoutine, and use this abstract handler to manipulate the underlying Linux Cgroup files, there are two purposes for this: 1. make the code more readable. 2. provides the base interface for Linux Cgroup v2. The second one is our main motivation for doing this. Of course, this is a relatively large change, so it's not all done, and more details need to be fixed. --- src/backend/cdb/cdbvars.c | 9 - src/backend/cdb/dispatcher/cdbdisp_query.c | 4 +- src/backend/commands/resgroupcmds.c | 16 +- src/backend/executor/execMain.c | 4 +- src/backend/postmaster/postmaster.c | 4 +- src/backend/utils/misc/guc_gp.c | 12 +- src/backend/utils/resgroup/Makefile | 5 +- .../utils/resgroup/cgroup-ops-linux-v1.c | 1420 ++++++++++++ src/backend/utils/resgroup/cgroup.c | 660 ++++++ .../utils/resgroup/resgroup-ops-dummy.c | 263 --- .../utils/resgroup/resgroup-ops-linux.c | 2018 ----------------- src/backend/utils/resgroup/resgroup.c | 118 +- src/backend/utils/resgroup/resgroup_helper.c | 10 +- .../utils/resource_manager/resource_manager.c | 5 +- src/include/cdb/cdbvars.h | 3 + src/include/utils/cgroup-ops-v1.h | 22 + src/include/utils/cgroup.h | 235 ++ src/include/utils/resgroup-ops.h | 84 - src/include/utils/resgroup.h | 2 + src/include/utils/unsync_guc_name.h | 1 + .../resgroup/resgroup_cpu_rate_limit.source | 2 +- .../resgroup/resgroup_cpu_rate_limit.source | 2 +- 22 files changed, 2452 insertions(+), 2447 deletions(-) create mode 100644 src/backend/utils/resgroup/cgroup-ops-linux-v1.c create mode 100644 src/backend/utils/resgroup/cgroup.c delete mode 100644 src/backend/utils/resgroup/resgroup-ops-dummy.c delete mode 100644 src/backend/utils/resgroup/resgroup-ops-linux.c create mode 100644 src/include/utils/cgroup-ops-v1.h create mode 100644 src/include/utils/cgroup.h delete mode 100644 src/include/utils/resgroup-ops.h diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c index 49a5b46dcbe..cf73c097cf4 100644 --- a/src/backend/cdb/cdbvars.c +++ b/src/backend/cdb/cdbvars.c @@ -32,7 +32,6 @@ #include "libpq/libpq-be.h" #include "postmaster/backoff.h" #include "utils/resource_manager.h" -#include "utils/resgroup-ops.h" #include "storage/proc.h" #include "storage/procarray.h" #include "cdb/memquota.h" @@ -527,20 +526,12 @@ gpvars_check_gp_resource_manager_policy(char **newval, void **extra, GucSource s void gpvars_assign_gp_resource_manager_policy(const char *newval, void *extra) { - /* - * Probe resgroup configurations even not in resgroup mode, - * variables like gp_resource_group_enable_cgroup_memory need to - * be properly set in all modes. - */ - ResGroupOps_Probe(); - if (newval == NULL || newval[0] == 0) Gp_resource_manager_policy = RESOURCE_MANAGER_POLICY_QUEUE; else if (!pg_strcasecmp("queue", newval)) Gp_resource_manager_policy = RESOURCE_MANAGER_POLICY_QUEUE; else if (!pg_strcasecmp("group", newval)) { - ResGroupOps_Bless(); Gp_resource_manager_policy = RESOURCE_MANAGER_POLICY_GROUP; gp_enable_resqueue_priority = false; } diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index e4cdba4c099..af72e05d9df 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -36,7 +36,7 @@ #include "utils/faultinjector.h" #include "utils/resgroup.h" #include "utils/resource_manager.h" -#include "utils/resgroup-ops.h" +#include "utils/cgroup.h" #include "utils/session_state.h" #include "utils/typcache.h" #include "miscadmin.h" @@ -278,7 +278,7 @@ CdbDispatchPlan(struct QueryDesc *queryDesc, * We enable resource group re-calculate the query_mem on QE, and we are not in * fall back mode (use statement_mem). */ - stmt->total_memory_coordinator = ResGroupOps_GetTotalMemory(); + stmt->total_memory_coordinator = getTotalMemory(); stmt->nsegments_coordinator = ResGroupGetHostPrimaryCount(); } diff --git a/src/backend/commands/resgroupcmds.c b/src/backend/commands/resgroupcmds.c index 83ffff52ee8..c0efa7be4b1 100644 --- a/src/backend/commands/resgroupcmds.c +++ b/src/backend/commands/resgroupcmds.c @@ -36,7 +36,7 @@ #include "utils/datetime.h" #include "utils/fmgroids.h" #include "utils/resgroup.h" -#include "utils/resgroup-ops.h" +#include "utils/cgroup.h" #include "utils/resource_manager.h" #include "utils/resowner.h" #include "utils/syscache.h" @@ -238,26 +238,26 @@ CreateResourceGroup(CreateResourceGroupStmt *stmt) RegisterXactCallbackOnce(createResgroupCallback, callbackCtx); /* Create os dependent part for this resource group */ - ResGroupOps_CreateGroup(groupid); + cgroupOpsRoutine->createcgroup(groupid); - ResGroupOps_SetMemoryLimit(groupid, caps.memLimit); + cgroupOpsRoutine->setmemorylimit(groupid, caps.memLimit); if (caps.cpuRateLimit != CPU_RATE_LIMIT_DISABLED) { - ResGroupOps_SetCpuRateLimit(groupid, caps.cpuRateLimit); + cgroupOpsRoutine->setcpulimit(groupid, caps.cpuRateLimit); } else if (!CpusetIsEmpty(caps.cpuset)) { EnsureCpusetIsAvailable(ERROR); - ResGroupOps_SetCpuSet(groupid, caps.cpuset); + cgroupOpsRoutine->setcpuset(groupid, caps.cpuset); /* reset default group, subtract new group cpu cores */ char defaultGroupCpuset[MaxCpuSetLength]; - ResGroupOps_GetCpuSet(DEFAULT_CPUSET_GROUP_ID, + cgroupOpsRoutine->getcpuset(DEFAULT_CPUSET_GROUP_ID, defaultGroupCpuset, MaxCpuSetLength); CpusetDifference(defaultGroupCpuset, caps.cpuset, MaxCpuSetLength); - ResGroupOps_SetCpuSet(DEFAULT_CPUSET_GROUP_ID, defaultGroupCpuset); + cgroupOpsRoutine->setcpuset(DEFAULT_CPUSET_GROUP_ID, defaultGroupCpuset); } SIMPLE_FAULT_INJECTOR("create_resource_group_fail"); } @@ -1296,7 +1296,7 @@ validateCapabilities(Relation rel, Bitmapset *bmsAll = NULL; /* Get all available cores */ - ResGroupOps_GetCpuSet(RESGROUP_ROOT_ID, + cgroupOpsRoutine->getcpuset(CGROUP_ROOT_ID, cpusetAll, MaxCpuSetLength); bmsAll = CpusetToBitset(cpusetAll, MaxCpuSetLength); diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 609529bc703..83bb0007a67 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -80,7 +80,7 @@ #include "utils/workfile_mgr.h" #include "utils/faultinjector.h" #include "utils/resource_manager.h" -#include "utils/resgroup-ops.h" +#include "utils/cgroup.h" #include "catalog/pg_statistic.h" #include "catalog/pg_class.h" @@ -274,7 +274,7 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) should_skip_operator_memory_assign = false; /* Get total system memory on the QE in MB */ - int total_memory_segment = ResGroupOps_GetTotalMemory(); + int total_memory_segment = getTotalMemory(); int nsegments_segment = ResGroupGetHostPrimaryCount(); uint64 coordinator_query_mem = queryDesc->plannedstmt->query_mem; diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 6ba0943ef70..84c68691162 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -159,8 +159,8 @@ #include "cdb/cdbendpoint.h" #include "cdb/ic_proxy_bgworker.h" #include "utils/metrics_utils.h" +#include "utils/resgroup.h" #include "utils/resource_manager.h" -#include "utils/resgroup-ops.h" /* * This is set in backends that are handling a GPDB specific message (FTS or @@ -1591,7 +1591,7 @@ PostmasterMain(int argc, char *argv[]) /* If enabled, init cgroup */ if (IsResGroupEnabled()) - ResGroupOps_Init(); + initCgroup(); /* * Initialize stats collection subsystem (this does NOT start the diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index c05e7ec9896..88aa9e57164 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -230,6 +230,7 @@ double gp_resource_group_memory_limit; bool gp_resource_group_bypass; bool gp_resource_group_cpu_ceiling_enforcement; bool gp_resource_group_enable_recalculate_query_mem; +bool gp_resource_group_enable_cgroup_version_two; /* Metrics collector debug GUC */ bool vmem_process_interrupt = false; @@ -2871,6 +2872,15 @@ struct config_bool ConfigureNamesBool_gp[] = NULL, NULL, NULL }, + { + {"gp_resource_group_enable_cgroup_version_two", PGC_POSTMASTER, RESOURCES, + gettext_noop("Enable linux cgroup version 2"), + NULL + }, + &gp_resource_group_enable_cgroup_version_two, + false, NULL, NULL + }, + { {"stats_queue_level", PGC_SUSET, STATS_COLLECTOR, gettext_noop("Collects resource queue-level statistics on database activity."), @@ -3236,7 +3246,7 @@ struct config_int ConfigureNamesInt_gp[] = NULL }, &gp_resource_group_cpu_priority, - 10, 1, 256, + 1, 1, 50, NULL, NULL, NULL }, diff --git a/src/backend/utils/resgroup/Makefile b/src/backend/utils/resgroup/Makefile index 88f5b7bfeba..06b72952707 100644 --- a/src/backend/utils/resgroup/Makefile +++ b/src/backend/utils/resgroup/Makefile @@ -16,9 +16,8 @@ override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) OBJS = resgroup.o resgroup_helper.o ifeq ($(PORTNAME),linux) -OBJS += resgroup-ops-linux.o -else -OBJS += resgroup-ops-dummy.o +OBJS += cgroup.o +OBJS += cgroup-ops-linux-v1.o endif include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/utils/resgroup/cgroup-ops-linux-v1.c b/src/backend/utils/resgroup/cgroup-ops-linux-v1.c new file mode 100644 index 00000000000..d32eedc2e4f --- /dev/null +++ b/src/backend/utils/resgroup/cgroup-ops-linux-v1.c @@ -0,0 +1,1420 @@ +/*------------------------------------------------------------------------- + * + * cgroup-ops-linux-v1.c + * OS dependent resource group operations - cgroup implementation + * + * Copyright (c) 2017 VMware, Inc. or its affiliates. + * + * + * IDENTIFICATION + * src/backend/utils/resgroup/cgroup-ops-linux-v1.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "cdb/cdbvars.h" +#include "miscadmin.h" +#include "utils/cgroup.h" +#include "utils/resgroup.h" +#include "utils/cgroup-ops-v1.h" +#include "utils/vmem_tracker.h" + +#ifndef __linux__ +#error cgroup is only available on linux +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static CGroupSystemInfo cgroupSystemInfoAlpha = { + 0, + "" +}; + +/* + * Interfaces for OS dependent operations. + * + * Resource group relies on OS dependent group implementation to manage + * resources like cpu usage, such as cgroup on Linux system. + * We call it OS group in below function description. + * + * So far these operations are mainly for CPU rate limitation and accounting. + */ + + +/* + * cgroup memory permission is only mandatory on 6.x and main; + * on 5.x we need to make it optional to provide backward compatibilities. + */ +#define CGROUP_MEMORY_IS_OPTIONAL (GP_VERSION_NUM < 60000) +/* + * cpuset permission is only mandatory on 6.x and main; + * on 5.x we need to make it optional to provide backward compatibilities. + */ +#define CGROUP_CPUSET_IS_OPTIONAL (GP_VERSION_NUM < 60000) + + +typedef struct PermItem PermItem; +typedef struct PermList PermList; + +struct PermItem +{ + CGroupComponentType comp; + const char *prop; + int perm; +}; + +struct PermList +{ + const PermItem *items; + bool optional; + bool *presult; +}; + +#define foreach_perm_list(i, lists) \ + for ((i) = 0; (lists)[(i)].items; (i)++) + +#define foreach_perm_item(i, items) \ + for ((i) = 0; (items)[(i)].comp != CGROUP_COMPONENT_UNKNOWN; (i)++) + +#define foreach_comp_type(comp) \ + for ((comp) = CGROUP_COMPONENT_FIRST; \ + (comp) < CGROUP_COMPONENT_COUNT; \ + (comp)++) + + +/* The functions current file used */ +static void detect_component_dirs_alpha(void); +static void dump_component_dirs_alpha(void); + +static bool perm_list_check_alpha(const PermList *permlist, Oid group, bool report); +static bool check_permission_alpha(Oid group, bool report); +static bool check_cpuset_permission_alpha(Oid group, bool report); +static void check_component_hierarchy_alpha(); + +static void init_cpu_alpha(void); +static void init_cpuset_alpha(void); + +static void create_default_cpuset_group_alpha(void); +static int64 get_cfs_period_us_alpha(CGroupComponentType component); + +/* + * currentGroupIdInCGroup & oldCaps are used for reducing redundant + * file operations + */ +static Oid currentGroupIdInCGroup = InvalidOid; + +static int64 system_cfs_quota_us = -1LL; +static int64 parent_cfs_quota_us = -1LL; + +/* + * These checks should keep in sync with gpMgmt/bin/gpcheckresgroupimpl + */ +static const PermItem perm_items_cpu[] = +{ + { CGROUP_COMPONENT_CPU, "", R_OK | W_OK | X_OK }, + { CGROUP_COMPONENT_CPU, "cgroup.procs", R_OK | W_OK }, + { CGROUP_COMPONENT_CPU, "cpu.cfs_period_us", R_OK | W_OK }, + { CGROUP_COMPONENT_CPU, "cpu.cfs_quota_us", R_OK | W_OK }, + { CGROUP_COMPONENT_CPU, "cpu.shares", R_OK | W_OK }, + { CGROUP_COMPONENT_UNKNOWN, NULL, 0 } +}; +static const PermItem perm_items_cpu_acct[] = +{ + { CGROUP_COMPONENT_CPUACCT, "", R_OK | W_OK | X_OK }, + { CGROUP_COMPONENT_CPUACCT, "cgroup.procs", R_OK | W_OK }, + { CGROUP_COMPONENT_CPUACCT, "cpuacct.usage", R_OK }, + { CGROUP_COMPONENT_CPUACCT, "cpuacct.stat", R_OK }, + { CGROUP_COMPONENT_UNKNOWN, NULL, 0 } +}; +static const PermItem perm_items_cpuset[] = +{ + { CGROUP_COMPONENT_CPUSET, "", R_OK | W_OK | X_OK }, + { CGROUP_COMPONENT_CPUSET, "cgroup.procs", R_OK | W_OK }, + { CGROUP_COMPONENT_CPUSET, "cpuset.cpus", R_OK | W_OK }, + { CGROUP_COMPONENT_CPUSET, "cpuset.mems", R_OK | W_OK }, + { CGROUP_COMPONENT_UNKNOWN, NULL, 0 } +}; +static const PermItem perm_items_memory[] = +{ + { CGROUP_COMPONENT_MEMORY, "", R_OK | W_OK | X_OK }, + { CGROUP_COMPONENT_MEMORY, "memory.limit_in_bytes", R_OK | W_OK }, + { CGROUP_COMPONENT_MEMORY, "memory.usage_in_bytes", R_OK }, + { CGROUP_COMPONENT_UNKNOWN, NULL, 0 } +}; +static const PermItem perm_items_swap[] = +{ + { CGROUP_COMPONENT_MEMORY, "", R_OK | W_OK | X_OK }, + { CGROUP_COMPONENT_MEMORY, "memory.memsw.limit_in_bytes", R_OK | W_OK }, + { CGROUP_COMPONENT_MEMORY, "memory.memsw.usage_in_bytes", R_OK }, + { CGROUP_COMPONENT_UNKNOWN, NULL, 0 } +}; + +/* + * just for cpuset check, same as the cpuset Permlist in permlists + */ +static const PermList cpusetPermList = +{ + perm_items_cpuset, + CGROUP_CPUSET_IS_OPTIONAL, + &gp_resource_group_enable_cgroup_cpuset, +}; + +/* + * Permission groups. + */ +static const PermList permlists[] = +{ + /* + * swap permissions are optional. + * + * cgroup/memory/memory.memsw.* is only available if + * - CONFIG_MEMCG_SWAP_ENABLED=on in kernel config, or + * - swapaccount=1 in kernel cmdline. + * + * Without these interfaces the swap usage can not be limited or accounted + * via cgroup. + */ + { perm_items_swap, true, &gp_resource_group_enable_cgroup_swap }, + + /* + * memory permissions can be mandatory or optional depends on the switch. + * + * resgroup memory auditor is introduced in 6.0 devel and backport + * to 5.x branch since 5.6.1. To provide backward compatibilities' memory + * permissions are optional on 5.x branch. + */ + { perm_items_memory, CGROUP_MEMORY_IS_OPTIONAL, + &gp_resource_group_enable_cgroup_memory }, + + /* cpu/cpuacct permissions are mandatory */ + { perm_items_cpu, false, NULL }, + { perm_items_cpu_acct, false, NULL }, + + /* + * cpuset permissions can be mandatory or optional depends on the switch. + * + * resgroup cpuset is introduced in 6.0 devel and backport + * to 5.x branch since 5.6.1. To provide backward compatibilities cpuset + * permissions are optional on 5.x branch. + */ + { perm_items_cpuset, CGROUP_CPUSET_IS_OPTIONAL, + &gp_resource_group_enable_cgroup_cpuset}, + + { NULL, false, NULL } +}; + +static const char *getcgroupname_v1(void); +static bool probecgroup_v1(void); +static void checkcgroup_v1(void); +static void initcgroup_v1(void); +static void adjustgucs_v1(void); +static void createcgroup_v1(Oid group); +static void attachcgroup_v1(Oid group, int pid, bool is_cpuset_enabled); +static void detachcgroup_v1(Oid group, CGroupComponentType component, int fd_dir); +static void destroycgroup_v1(Oid group, bool migrate); +static int lockcgroup_v1(Oid group, CGroupComponentType component, bool block); +static void unlockcgroup_v1(int fd); +static void setcpulimit_v1(Oid group, int cpu_rate_limit); +static void setmemorylimitbychunks_v1(Oid group, int32 memory_limit_chunks); +static void setmemorylimit_v1(Oid group, int memory_limit); +static int64 getcpuusage_v1(Oid group); +static int32 getmemoryusage_v1(Oid group); +static int32 getmemorylimitchunks_v1(Oid group); +static void getcpuset_v1(Oid group, char *cpuset, int len); +static void setcpuset_v1(Oid group, const char *cpuset); +static float convertcpuusage_v1(int64 usage, int64 duration); + +/* + * Detect gpdb cgroup component dirs. + * + * Take cpu for example, by default we expect gpdb dir to locate at + * cgroup/cpu/gpdb. But we'll also check for the cgroup dirs of init process + * (pid 1), e.g. cgroup/cpu/custom, then we'll look for gpdb dir at + * cgroup/cpu/custom/gpdb, if it's found and has good permissions, it can be + * used instead of the default one. + * + * If any of the gpdb cgroup component dir can not be found under init process' + * cgroup dirs or has bad permissions we'll fallback all the gpdb cgroup + * component dirs to the default ones. + * + * NOTE: This auto detection will look for memory & cpuset gpdb dirs even on + * 5X. + */ +static void +detect_component_dirs_alpha(void) +{ + CGroupComponentType component; + FILE *f; + char buf[MAX_CGROUP_PATHLEN * 2]; + int maskAll = (1 << CGROUP_COMPONENT_COUNT) - 1; + int maskDetected = 0; + + f = fopen("/proc/1/cgroup", "r"); + if (!f) + goto fallback; + + /* + * format: id:comps:path, e.g.: + * + * 10:cpuset:/ + * 4:cpu,cpuacct:/ + * 1:name=systemd:/init.scope + * 0::/init.scope + */ + while (fscanf(f, "%*d:%s", buf) != EOF) + { + CGroupComponentType components[CGROUP_COMPONENT_COUNT]; + int ncomps = 0; + char *ptr; + char *tmp; + char sep = '\0'; + int i; + + /* buf is stored with "comps:path" */ + + if (buf[0] == ':') + continue; /* ignore empty comp */ + + /* split comps */ + for (ptr = buf; sep != ':'; ptr = tmp) + { + tmp = strpbrk(ptr, ":,="); + + sep = *tmp; + *tmp++ = 0; + + /* for name=comp case there is nothing to do with the name */ + if (sep == '=') + continue; + + component = getComponentType(ptr); + + if (component == CGROUP_COMPONENT_UNKNOWN) + continue; /* not used by us */ + + /* + * push the comp to the comps stack, but if the stack is already + * full (which is unlikely to happen in real world), simply ignore + * it. + */ + if (ncomps < CGROUP_COMPONENT_COUNT) + components[ncomps++] = component; + } + + /* now ptr point to the path */ + Assert(strlen(ptr) < MAX_CGROUP_PATHLEN); + + /* if the path is "/" then use empty string "" instead of it */ + if (strcmp(ptr, "/") == 0) + ptr[0] = '\0'; + + /* validate and set path for the comps */ + for (i = 0; i < ncomps; i++) + { + component = components[i]; + setComponentDir(component, ptr); + + if (!validateComponentDir(component)) + goto fallback; /* dir missing or bad permissions */ + + if (maskDetected & (1 << component)) + goto fallback; /* comp are detected more than once */ + + maskDetected |= 1 << component; + } + } + + if (maskDetected != maskAll) + goto fallback; /* not all the comps are detected */ + + /* + * Dump the comp dirs for debugging? No! + * This function is executed before timezone initialization, logs are + * forbidden. + */ + + fclose(f); + return; + +fallback: + /* set the fallback dirs for all the comps */ + foreach_comp_type(component) + { + setComponentDir(component, FALLBACK_COMP_DIR); + } + + if (f) + fclose(f); +} + + +/* + * Dump comp dirs. + */ +static void +dump_component_dirs_alpha(void) +{ + CGroupComponentType component; + char path[MAX_CGROUP_PATHLEN]; + size_t path_size = sizeof(path); + + foreach_comp_type(component) + { + buildPath(CGROUP_ROOT_ID, BASEDIR_GPDB, component, "", path, path_size); + + elog(LOG, "gpdb dir for cgroup component \"%s\": %s", + getComponentName(component), path); + } +} + + +/* + * Check a list of permissions on group. + * + * - if all the permissions are met then return true; + * - otherwise: + * - raise an error if report is true and permlist is not optional; + * - or return false; + */ +static bool +perm_list_check_alpha(const PermList *permlist, Oid group, bool report) +{ + char path[MAX_CGROUP_PATHLEN]; + size_t path_size = sizeof(path); + int i; + + if (group == CGROUP_ROOT_ID && permlist->presult) + *permlist->presult = false; + + foreach_perm_item(i, permlist->items) + { + CGroupComponentType component = permlist->items[i].comp; + const char *prop = permlist->items[i].prop; + int perm = permlist->items[i].perm; + + if (!buildPathSafe(group, BASEDIR_GPDB, component, prop, path, path_size)) + { + /* Buffer is not large enough for the path */ + + if (report && !permlist->optional) + { + CGROUP_CONFIG_ERROR("invalid %s name '%s': %m", + prop[0] ? "file" : "directory", + path); + } + return false; + } + + if (access(path, perm)) + { + /* No such file or directory / Permission denied */ + + if (report && !permlist->optional) + { + CGROUP_CONFIG_ERROR("can't access %s '%s': %m", + prop[0] ? "file" : "directory", + path); + } + return false; + } + } + + if (group == CGROUP_ROOT_ID && permlist->presult) + *permlist->presult = true; + + return true; +} + +/* + * Check permissions on group's cgroup dir & interface files. + * + * - if report is true then raise an error if any mandatory permission + * is not met; + */ +static bool +check_permission_alpha(Oid group, bool report) +{ + int i; + + foreach_perm_list(i, permlists) + { + const PermList *permList = &permlists[i]; + + if (!perm_list_check_alpha(permList, group, report) && !permList->optional) + return false; + } + + return true; +} + +/* + * Same as check_permission, just check cpuset dir & interface files. + */ +static bool +check_cpuset_permission_alpha(Oid group, bool report) +{ + if (!gp_resource_group_enable_cgroup_cpuset) + return true; + + if (!perm_list_check_alpha(&cpusetPermList, group, report) && + !cpusetPermList.optional) + return false; + + return true; +} + +/* + * Check the mount hierarchy of cpu and cpuset subsystem. + * + * Raise an error if cpu and cpuset are mounted on the same hierarchy. + */ +static void +check_component_hierarchy_alpha() +{ + CGroupComponentType component; + FILE *f; + char buf[MAX_CGROUP_PATHLEN * 2]; + + f = fopen("/proc/1/cgroup", "r"); + if (!f) + { + CGROUP_CONFIG_ERROR("can't check component mount hierarchy \ + file '/proc/1/cgroup' doesn't exist"); + return; + } + + /* + * format: id:comps:path, e.g.: + * + * 10:cpuset:/ + * 4:cpu,cpuacct:/ + * 1:name=systemd:/init.scope + * 0::/init.scope + */ + while (fscanf(f, "%*d:%s", buf) != EOF) + { + char *ptr; + char *tmp; + char sep = '\0'; + /* mark if the line has already contained cpu or cpuset component */ + int markComp = CGROUP_COMPONENT_UNKNOWN; + + /* buf is stored with "comps:path" */ + if (buf[0] == ':') + continue; /* ignore empty comp */ + + /* split comps */ + for (ptr = buf; sep != ':'; ptr = tmp) + { + tmp = strpbrk(ptr, ":,="); + + sep = *tmp; + *tmp++ = 0; + + /* for name=comp case there is nothing to do with the name */ + if (sep == '=') + continue; + + component = getComponentType(ptr); + + if (component == CGROUP_COMPONENT_UNKNOWN) + continue; /* not used by us */ + + if (component == CGROUP_COMPONENT_CPU || component == CGROUP_COMPONENT_CPUSET) + { + if (markComp == CGROUP_COMPONENT_UNKNOWN) + markComp = component; + else + { + Assert(markComp != component); + fclose(f); + CGROUP_CONFIG_ERROR("can't mount 'cpu' and 'cpuset' on the same hierarchy"); + return; + } + } + } + } + + fclose(f); +} + +/* + * Init gpdb cpu settings. + */ +static void +init_cpu_alpha(void) +{ + CGroupComponentType component = CGROUP_COMPONENT_CPU; + int64 cfs_quota_us; + int64 shares; + + /* + * CGroup promises that cfs_quota_us will never be 0, however on centos6 + * we ever noticed that it has the value 0. + */ + if (parent_cfs_quota_us <= 0LL) + { + /* + * parent cgroup is unlimited, calculate gpdb's limitation based on + * system hardware configuration. + * + * cfs_quota_us := parent.cfs_period_us * ncores * gp_resource_group_cpu_limit + */ + cfs_quota_us = system_cfs_quota_us * gp_resource_group_cpu_limit; + } + else + { + /* + * parent cgroup is also limited, then calculate gpdb's limitation + * based on it. + * + * cfs_quota_us := parent.cfs_quota_us * gp_resource_group_cpu_limit + */ + cfs_quota_us = parent_cfs_quota_us * gp_resource_group_cpu_limit; + } + + writeInt64(CGROUP_ROOT_ID, BASEDIR_GPDB, + component, "cpu.cfs_quota_us", cfs_quota_us); + + /* + * shares := parent.shares * gp_resource_group_cpu_priority + * + * We used to set a large shares (like 1024 * 50, the maximum possible + * value), it has very bad effect on overall system performance, + * especially on 1-core or 2-core low-end systems. + */ + shares = readInt64(CGROUP_ROOT_ID, BASEDIR_PARENT, component, "cpu.shares"); + shares = shares * gp_resource_group_cpu_priority; + + writeInt64(CGROUP_ROOT_ID, BASEDIR_GPDB, component, "cpu.shares", shares); +} + +/* + * Init gpdb cpuset settings. + */ +static void +init_cpuset_alpha(void) +{ + CGroupComponentType component = CGROUP_COMPONENT_CPUSET; + char buffer[MaxCpuSetLength]; + + if (!gp_resource_group_enable_cgroup_cpuset) + return; + + /* + * Get cpuset.mems and cpuset.cpus values from cgroup cpuset root path, + * and set them to cpuset/gpdb/cpuset.mems and cpuset/gpdb/cpuset.cpus + * to make sure that gpdb directory configuration is same as its + * parent directory + */ + + readStr(CGROUP_ROOT_ID, BASEDIR_PARENT, component, "cpuset.mems", + buffer, sizeof(buffer)); + writeStr(CGROUP_ROOT_ID, BASEDIR_GPDB, component, "cpuset.mems", buffer); + + readStr(CGROUP_ROOT_ID, BASEDIR_PARENT, component, "cpuset.cpus", + buffer, sizeof(buffer)); + writeStr(CGROUP_ROOT_ID, BASEDIR_GPDB, component, "cpuset.cpus", buffer); + + create_default_cpuset_group_alpha(); +} + +static int64 +get_cfs_period_us_alpha(CGroupComponentType component) +{ + int64 cfs_period_us; + + /* + * calculate cpu rate limit of system. + * + * Ideally the cpu quota is calculated from parent information: + * + * system_cfs_quota_us := parent.cfs_period_us * ncores. + * + * However, on centos6 we found parent.cfs_period_us can be 0 and is not + * writable. In the other side, gpdb.cfs_period_us should be equal to + * parent.cfs_period_us because sub dirs inherit parent properties by + * default, so we read it instead. + */ + cfs_period_us = readInt64(CGROUP_ROOT_ID, BASEDIR_GPDB, + component, "cpu.cfs_period_us"); + + if (cfs_period_us == 0LL) + { + /* + * if gpdb.cfs_period_us is also 0 try to correct it by setting the + * default value 100000 (100ms). + */ + writeInt64(CGROUP_ROOT_ID, BASEDIR_GPDB, + component, "cpu.cfs_period_us", DEFAULT_CPU_PERIOD_US); + + /* read again to verify the effect */ + cfs_period_us = readInt64(CGROUP_ROOT_ID, BASEDIR_GPDB, + component, "cpu.cfs_period_us"); + + if (cfs_period_us <= 0LL) + CGROUP_CONFIG_ERROR("invalid cpu.cfs_period_us value: " + INT64_FORMAT, + cfs_period_us); + } + + return cfs_period_us; +} + +/* Return the name for the OS group implementation */ +static const char * +getcgroupname_v1(void) +{ + return "cgroup"; +} + +/* + * Probe the configuration for the OS group implementation. + * + * Return true if everything is OK, or false is some requirements are not + * satisfied. + */ +static bool +probecgroup_v1(void) +{ + /* + * Ignore the error even if cgroup mount point can not be successfully + * probed, the error will be reported in checkcgroup() later. + */ + if (!getCgroupMountDir()) + return false; + + detect_component_dirs_alpha(); + + if (!check_permission_alpha(CGROUP_ROOT_ID, false)) + return false; + + return true; +} + +/* Check whether the OS group implementation is available and usable */ +static void +checkcgroup_v1(void) +{ + CGroupComponentType component = CGROUP_COMPONENT_CPU; + int64 cfs_period_us; + + /* + * We only have to do these checks and initialization once on each host, + * so only let postmaster do the job. + */ + Assert(!IsUnderPostmaster); + + /* + * We should have already detected for cgroup mount point in probecgroup(), + * it was not an error if the detection failed at that step. But once + * we call checkcgroup() we know we want to make use of cgroup then we must + * know the mount point, otherwise it's a critical error. + */ + if (!cgroupSystemInfoAlpha.cgroup_dir[0]) + CGROUP_CONFIG_ERROR("can not find cgroup mount point"); + + /* + * Check again, this time we will fail on unmet requirements. + */ + check_permission_alpha(CGROUP_ROOT_ID, true); + + /* + * Check if cpu and cpuset subsystems are mounted on the same hierarchy. + * We do not allow they mount on the same hierarchy, because writing pid + * to DEFAULT_CPUSET_GROUP_ID in attachcgroup will cause the + * removal of the pid in group BASEDIR_GPDB, which will make cpu usage + * out of control. + */ + if (!CGROUP_CPUSET_IS_OPTIONAL) + check_component_hierarchy_alpha(); + + /* + * Dump the cgroup comp dirs to logs. + * Check detect_component_dirs() to know why this is not done in that function. + */ + dump_component_dirs_alpha(); + + /* + * Get some necessary system information. + * We can not do them in probecgroup() as failure is not allowed in that one. + */ + + /* get system cpu cores */ + cgroupSystemInfoAlpha.ncores = getCPUCores(); + + cfs_period_us = get_cfs_period_us_alpha(component); + system_cfs_quota_us = cfs_period_us * cgroupSystemInfoAlpha.ncores; + + /* read cpu rate limit of parent cgroup */ + parent_cfs_quota_us = readInt64(CGROUP_ROOT_ID, BASEDIR_PARENT, + component, "cpu.cfs_quota_us"); +} + +/* Initialize the OS group */ +static void +initcgroup_v1(void) +{ + init_cpu_alpha(); + init_cpuset_alpha(); + + /* + * After basic controller inited, we need to create the SYSTEM CGROUP + * which will control the postmaster and auxiliary process, such as + * BgWriter, SysLogger. + * + * We need to add it to the system cgroup before the postmaster fork + * the child process to limit the resource usage of the parent process + * and all child processes. + */ + createcgroup_v1(GPDB_SYSTEM_CGROUP); + attachcgroup_v1(GPDB_SYSTEM_CGROUP, PostmasterPid, false); +} + +/* Adjust GUCs for this OS group implementation */ +static void +adjustgucs_v1(void) +{ + /* + * cgroup cpu limitation works best when all processes have equal + * priorities, so we force all the segments and postmaster to + * work with nice=0. + * + * this function should be called before GUCs are dispatched to segments. + */ + gp_segworker_relative_priority = 0; +} + +/* + * Create the OS group for group. + */ +static void +createcgroup_v1(Oid group) +{ + int retry = 0; + + if (!createDir(group, CGROUP_COMPONENT_CPU) || + !createDir(group, CGROUP_COMPONENT_CPUACCT) || + (gp_resource_group_enable_cgroup_memory && + !createDir(group, CGROUP_COMPONENT_MEMORY)) || + (gp_resource_group_enable_cgroup_cpuset && + !createDir(group, CGROUP_COMPONENT_CPUSET))) + { + CGROUP_ERROR("can't create cgroup for resource group '%d': %m", group); + } + + /* + * although the group dir is created the interface files may not be + * created yet, so we check them repeatedly until everything is ready. + */ + while (++retry <= MAX_RETRY && !check_permission_alpha(group, false)) + pg_usleep(1000); + + if (retry > MAX_RETRY) + { + /* + * still not ready after MAX_RETRY retries, might be a real error, + * raise the error. + */ + check_permission_alpha(group, true); + } + + if (gp_resource_group_enable_cgroup_cpuset) + { + /* + * Initialize cpuset.mems and cpuset.cpus values as its parent directory + */ + CGroupComponentType component = CGROUP_COMPONENT_CPUSET; + char buffer[MaxCpuSetLength]; + + readStr(CGROUP_ROOT_ID, BASEDIR_GPDB, component, "cpuset.mems", + buffer, sizeof(buffer)); + writeStr(group, BASEDIR_GPDB, component, "cpuset.mems", buffer); + + readStr(CGROUP_ROOT_ID, BASEDIR_GPDB, component, "cpuset.cpus", + buffer, sizeof(buffer)); + writeStr(group, BASEDIR_GPDB, component, "cpuset.cpus", buffer); + } +} + +/* + * Create the OS group for default cpuset group. + * default cpuset group is a special group, only take effect in cpuset + */ +static void +create_default_cpuset_group_alpha(void) +{ + CGroupComponentType component = CGROUP_COMPONENT_CPUSET; + int retry = 0; + + if (!createDir(DEFAULT_CPUSET_GROUP_ID, component)) + { + CGROUP_ERROR("can't create cpuset cgroup for resgroup '%d': %m", + DEFAULT_CPUSET_GROUP_ID); + } + + /* + * although the group dir is created the interface files may not be + * created yet, so we check them repeatedly until everything is ready. + */ + while (++retry <= MAX_RETRY && + !check_cpuset_permission_alpha(DEFAULT_CPUSET_GROUP_ID, false)) + pg_usleep(1000); + + if (retry > MAX_RETRY) + { + /* + * still not ready after MAX_RETRY retries, might be a real error, + * raise the error. + */ + check_cpuset_permission_alpha(DEFAULT_CPUSET_GROUP_ID, true); + } + + /* + * Initialize cpuset.mems and cpuset.cpus in default group as its + * parent directory + */ + char buffer[MaxCpuSetLength]; + + readStr(CGROUP_ROOT_ID, BASEDIR_GPDB, component, "cpuset.mems", + buffer, sizeof(buffer)); + writeStr(DEFAULT_CPUSET_GROUP_ID, BASEDIR_GPDB, component, "cpuset.mems", buffer); + + readStr(CGROUP_ROOT_ID, BASEDIR_GPDB, component, "cpuset.cpus", + buffer, sizeof(buffer)); + writeStr(DEFAULT_CPUSET_GROUP_ID, BASEDIR_GPDB, component, "cpuset.cpus", buffer); +} + + +/* + * Assign a process to the OS group. A process can only be assigned to one + * OS group, if it's already running under other OS group then it'll be moved + * out that OS group. + * + * pid is the process id. + */ +static void +attachcgroup_v1(Oid group, int pid, bool is_cpuset_enabled) +{ + /* + * needn't write to file if the pid has already been written in. + * Unless it has not been written or the group has changed or + * cpu control mechanism has changed. + */ + if (IsUnderPostmaster && group == currentGroupIdInCGroup) + return; + + writeInt64(group, BASEDIR_GPDB, CGROUP_COMPONENT_CPU, + "cgroup.procs", pid); + writeInt64(group, BASEDIR_GPDB, CGROUP_COMPONENT_CPUACCT, + "cgroup.procs", pid); + + if (gp_resource_group_enable_cgroup_cpuset) + { + if (is_cpuset_enabled) + { + writeInt64(group, BASEDIR_GPDB, + CGROUP_COMPONENT_CPUSET, "cgroup.procs", pid); + } + else + { + /* add pid to default group */ + writeInt64(DEFAULT_CPUSET_GROUP_ID, BASEDIR_GPDB, + CGROUP_COMPONENT_CPUSET, "cgroup.procs", pid); + } + } + + /* + * Do not assign the process to cgroup/memory for now. + */ + + currentGroupIdInCGroup = group; +} + + +/* + * un-assign all the processes from a cgroup. + * + * These processes will be moved to the gpdb default cgroup. + * + * This function must be called with the gpdb toplevel dir locked, + * fd_dir is the fd for this lock, on any failure fd_dir will be closed + * (and unlocked implicitly) then an error is raised. + */ +static void +detachcgroup_v1(Oid group, CGroupComponentType component, int fd_dir) +{ + char path[MAX_CGROUP_PATHLEN]; + size_t path_size = sizeof(path); + + char *buf; + size_t buf_size; + size_t buf_len = -1; + + int fdr = -1; + int fdw = -1; + + const size_t buf_delta_size = 512; + + /* + * Check an operation result on path. + * + * Operation can be open(), close(), read(), write(), etc., which must + * set the errno on error. + * + * - condition describes the expected result of the operation; + * - action is the cleanup action on failure, such as closing the fd, + * multiple actions can be specified by putting them in brackets, + * such as (op1, op2); + * - message describes what's failed; + */ +#define __CHECK(condition, action, message) do { \ + if (!(condition)) \ + { \ + /* save errno in case it's changed in actions */ \ + int err = errno; \ + action; \ + CGROUP_ERROR(message ": %s: %s", path, strerror(err)); \ + } \ +} while (0) + + buildPath(group, BASEDIR_GPDB, component, "cgroup.procs", path, path_size); + + fdr = open(path, O_RDONLY); + + __CHECK(fdr >= 0, ( close(fd_dir) ), "can't open file for read"); + + buf_len = 0; + buf_size = buf_delta_size; + buf = palloc(buf_size); + + while (1) + { + int n = read(fdr, buf + buf_len, buf_delta_size); + __CHECK(n >= 0, ( close(fdr), close(fd_dir) ), "can't read from file"); + + buf_len += n; + + if (n < buf_delta_size) + break; + + buf_size += buf_delta_size; + buf = repalloc(buf, buf_size); + } + + close(fdr); + if (buf_len == 0) + return; + + buildPath(GPDB_DEFAULT_CGROUP, BASEDIR_GPDB, component, "cgroup.procs", + path, path_size); + + fdw = open(path, O_WRONLY); + __CHECK(fdw >= 0, ( close(fd_dir) ), "can't open file for write"); + + char *ptr = buf; + char *end = NULL; + long pid; + + /* + * as required by cgroup, only one pid can be migrated in each single + * write() call, so we have to parse the pids from the buffer first, + * then write them one by one. + */ + while (1) + { + pid = strtol(ptr, &end, 10); + __CHECK(pid != LONG_MIN && pid != LONG_MAX, + ( close(fdw), close(fd_dir) ), + "can't parse pid"); + + if (ptr == end) + break; + + char str[22]; + sprintf(str, "%ld", pid); + int n = write(fdw, str, strlen(str)); + if (n < 0) + { + elog(LOG, "failed to migrate pid to gpdb root cgroup: pid=%ld: %m", + pid); + } + else + { + __CHECK(n == strlen(str), + ( close(fdw), close(fd_dir) ), + "can't write to file"); + } + + ptr = end; + } + + close(fdw); + +#undef __CHECK +} + + +/* + * Destroy the OS cgroup. + * + * One OS group can not be dropped if there are processes running under it, + * if migrate is true these processes will be moved out automatically. + */ +static void +destroycgroup_v1(Oid group, bool migrate) +{ + if (!deleteDir(group, CGROUP_COMPONENT_CPU, "cpu.shares", migrate, detachcgroup_v1) || + !deleteDir(group, CGROUP_COMPONENT_CPUACCT, NULL, migrate, detachcgroup_v1) || + (gp_resource_group_enable_cgroup_cpuset && + !deleteDir(group, CGROUP_COMPONENT_CPUSET, NULL, migrate, detachcgroup_v1)) || + (gp_resource_group_enable_cgroup_memory && + !deleteDir(group, CGROUP_COMPONENT_MEMORY, "memory.limit_in_bytes", migrate, detachcgroup_v1))) + { + CGROUP_ERROR("can't remove cgroup for resource group '%d': %m", group); + } +} + + +/* + * Lock the OS group. While the group is locked it won't be removed by other + * processes. + * + * This function would block if block is true, otherwise it returns with -1 + * immediately. + * + * On success, it returns a fd to the OS group, pass it to unlockcgroup_v1() + * to unlock it. + */ +static int +lockcgroup_v1(Oid group, CGroupComponentType component, bool block) +{ + char path[MAX_CGROUP_PATHLEN]; + size_t path_size = sizeof(path); + + buildPath(group, BASEDIR_GPDB, component, "", path, path_size); + + return lockDir(path, block); +} + +/* + * Unblock an OS group. + * + * fd is the value returned by lockcgroup_v1(). + */ +static void +unlockcgroup_v1(int fd) +{ + if (fd >= 0) + close(fd); +} + +/* + * Set the cpu rate limit for the OS group. + * + * cpu_rate_limit should be within [0, 100]. + */ +static void +setcpulimit_v1(Oid group, int cpu_rate_limit) +{ + CGroupComponentType component = CGROUP_COMPONENT_CPU; + + /* group.shares := gpdb.shares * cpu_rate_limit */ + + int64 shares = readInt64(CGROUP_ROOT_ID, BASEDIR_GPDB, component, + "cpu.shares"); + writeInt64(group, BASEDIR_GPDB, component, + "cpu.shares", shares * cpu_rate_limit / 100); + + /* set cpu.cfs_quota_us if hard CPU enforcement is enabled */ + if (gp_resource_group_cpu_ceiling_enforcement) + { + int64 periods = get_cfs_period_us_alpha(component); + writeInt64(group, BASEDIR_GPDB, component, "cpu.cfs_quota_us", + periods * cgroupSystemInfoAlpha.ncores * cpu_rate_limit / 100); + } + else + { + writeInt64(group, BASEDIR_GPDB, component, "cpu.cfs_quota_us", -1); + } +} + + +/* + * Set the memory limit for the OS group by value. + * + * memory_limit is the limit value in chunks + * + * If cgroup supports memory swap, we will write the same limit to + * memory.memsw.limit and memory.limit. + */ +static void +setmemorylimitbychunks_v1(Oid group, int32 memory_limit_chunks) +{ + CGroupComponentType component = CGROUP_COMPONENT_MEMORY; + int64 memory_limit_in_bytes; + + if (!gp_resource_group_enable_cgroup_memory) + return; + + memory_limit_in_bytes = VmemTracker_ConvertVmemChunksToBytes(memory_limit_chunks); + + /* Is swap interfaces enabled? */ + if (!gp_resource_group_enable_cgroup_swap) + { + /* No, then we only need to setup the memory limit */ + writeInt64(group, BASEDIR_GPDB, component, "memory.limit_in_bytes", + memory_limit_in_bytes); + } + else + { + /* Yes, then we have to setup both the memory and mem+swap limits */ + + int64 memory_limit_in_bytes_old; + + /* + * Memory limit should always <= mem+swap limit, then the limits + * must be set in a proper order depending on the relation between + * new and old limits. + */ + memory_limit_in_bytes_old = readInt64(group, BASEDIR_GPDB, component, + "memory.limit_in_bytes"); + + if (memory_limit_in_bytes > memory_limit_in_bytes_old) + { + /* When new value > old memory limit, write mem+swap limit first */ + writeInt64(group, BASEDIR_GPDB, component, + "memory.memsw.limit_in_bytes", memory_limit_in_bytes); + writeInt64(group, BASEDIR_GPDB, component, + "memory.limit_in_bytes", memory_limit_in_bytes); + } + else if (memory_limit_in_bytes < memory_limit_in_bytes_old) + { + /* When new value < old memory limit, write memory limit first */ + writeInt64(group, BASEDIR_GPDB, component, + "memory.limit_in_bytes", memory_limit_in_bytes); + writeInt64(group, BASEDIR_GPDB, component, + "memory.memsw.limit_in_bytes", memory_limit_in_bytes); + } + } +} + +/* + * Set the memory limit for the OS group by rate. + * + * memory_limit should be within [0, 100]. + */ +static void +setmemorylimit_v1(Oid group, int memory_limit) +{ + CGroupComponentType component = CGROUP_COMPONENT_MEMORY; + int fd; + int32 memory_limit_in_chunks; + + memory_limit_in_chunks = ResGroupGetVmemLimitChunks() * memory_limit / 100; + memory_limit_in_chunks *= ResGroupGetHostPrimaryCount(); + + fd = lockcgroup_v1(group, component, true); + setmemorylimitbychunks_v1(group, memory_limit_in_chunks); + unlockcgroup_v1(fd); +} + + +/* + * Get the cpu usage of the OS group, that is the total cpu time obtained + * by this OS group, in nano seconds. + */ +static int64 +getcpuusage_v1(Oid group) +{ + CGroupComponentType component = CGROUP_COMPONENT_CPUACCT; + + return readInt64(group, BASEDIR_GPDB, component, "cpuacct.usage"); +} + +/* + * Get the memory usage of the OS group + * + * memory usage is returned in chunks + */ +static int32 +getmemoryusage_v1(Oid group) +{ + CGroupComponentType component = CGROUP_COMPONENT_MEMORY; + int64 memory_usage_in_bytes; + char *filename; + + /* Report 0 if cgroup memory is not enabled */ + if (!gp_resource_group_enable_cgroup_memory) + return 0; + + filename = gp_resource_group_enable_cgroup_swap + ? "memory.memsw.usage_in_bytes" + : "memory.usage_in_bytes"; + + memory_usage_in_bytes = readInt64(group, BASEDIR_GPDB, component, filename); + + return VmemTracker_ConvertVmemBytesToChunks(memory_usage_in_bytes); +} + +/* + * Get the memory limit of the OS group + * + * memory limit is returned in chunks + */ +static int32 +getmemorylimitchunks_v1(Oid group) +{ + CGroupComponentType component = CGROUP_COMPONENT_MEMORY; + int64 memory_limit_in_bytes; + + /* Report unlimited (max int32) if cgroup memory is not enabled */ + if (!gp_resource_group_enable_cgroup_memory) + return (int32) ((1U << 31) - 1); + + memory_limit_in_bytes = readInt64(group, BASEDIR_GPDB, + component, "memory.limit_in_bytes"); + + return VmemTracker_ConvertVmemBytesToChunks(memory_limit_in_bytes); +} + + +/* + * Get the cpuset of the OS group. + * @param group: the destination group + * @param cpuset: the str to be set + * @param len: the upper limit of the str + */ +static void +getcpuset_v1(Oid group, char *cpuset, int len) +{ + CGroupComponentType component = CGROUP_COMPONENT_CPUSET; + + if (!gp_resource_group_enable_cgroup_cpuset) + return ; + + readStr(group, BASEDIR_GPDB, component, "cpuset.cpus", cpuset, len); +} + + +/* + * Set the cpuset for the OS group. + * @param group: the destination group + * @param cpuset: the value to be set + * The syntax of CPUSET is a combination of the tuples, each tuple represents + * one core number or the core numbers interval, separated by comma. + * E.g. 0,1,2-3. + */ +static void +setcpuset_v1(Oid group, const char *cpuset) +{ + CGroupComponentType component = CGROUP_COMPONENT_CPUSET; + + if (!gp_resource_group_enable_cgroup_cpuset) + return ; + + writeStr(group, BASEDIR_GPDB, component, "cpuset.cpus", cpuset); +} + + +/* + * Convert the cpu usage to percentage within the duration. + * + * usage is the delta of getcpuusage() of a duration, + * duration is in micro seconds. + * + * When fully consuming one cpu core the return value will be 100.0 . + */ +static float +convertcpuusage_v1(int64 usage, int64 duration) +{ + float percent; + + Assert(usage >= 0LL); + Assert(duration > 0LL); + + /* There should always be at least one core on the system */ + Assert(cgroupSystemInfoAlpha.ncores > 0); + + /* + * Usage is the cpu time (nano seconds) obtained by this group in the time + * duration (micro seconds), so cpu time on one core can be calculated as: + * + * usage / 1000 / duration / ncores + * + * To convert it to percentage we should multiple 100%: + * + * usage / 1000 / duration / ncores * 100% + * = usage / 10 / duration / ncores + */ + percent = usage / 10.0 / duration / cgroupSystemInfoAlpha.ncores; + + /* + * Now we have the system level percentage, however when running in a + * container with limited cpu quota we need to further scale it with + * parent. Suppose parent has 50% cpu quota and gpdb is consuming all of + * it, then we want gpdb to report the cpu usage as 100% instead of 50%. + */ + + if (parent_cfs_quota_us > 0LL) + { + /* + * Parent cgroup is also limited, scale the percentage to the one in + * parent cgroup. Do not change the expression to `percent *= ...`, + * that will lose the precision. + */ + percent = percent * system_cfs_quota_us / parent_cfs_quota_us; + } + + return percent; +} + +static CGroupOpsRoutine cGroupOpsRoutineAlpha = { + .getcgroupname = getcgroupname_v1, + .probecgroup = probecgroup_v1, + .checkcgroup = checkcgroup_v1, + .initcgroup = initcgroup_v1, + .adjustgucs = adjustgucs_v1, + .createcgroup = createcgroup_v1, + .destroycgroup = destroycgroup_v1, + + .attachcgroup = attachcgroup_v1, + .detachcgroup = detachcgroup_v1, + + .lockcgroup = lockcgroup_v1, + .unlockcgroup = unlockcgroup_v1, + + .setcpulimit = setcpulimit_v1, + .getcpuusage = getcpuusage_v1, + .getcpuset = getcpuset_v1, + .setcpuset = setcpuset_v1, + + .getmemoryusage = getmemoryusage_v1, + .setmemorylimit = setmemorylimit_v1, + .getmemorylimitchunks = getmemorylimitchunks_v1, + .setmemorylimitbychunks = setmemorylimitbychunks_v1, + + .convertcpuusage = convertcpuusage_v1, +}; + +CGroupOpsRoutine *get_group_routine_alpha(void) +{ + return &cGroupOpsRoutineAlpha; +} + +CGroupSystemInfo *get_cgroup_sysinfo_alpha(void) +{ + return &cgroupSystemInfoAlpha; +} diff --git a/src/backend/utils/resgroup/cgroup.c b/src/backend/utils/resgroup/cgroup.c new file mode 100644 index 00000000000..03fd39dfc63 --- /dev/null +++ b/src/backend/utils/resgroup/cgroup.c @@ -0,0 +1,660 @@ +#include "postgres.h" + +#include + +#include "cdb/cdbvars.h" +#include "miscadmin.h" +#include "utils/cgroup.h" +#include "utils/resgroup.h" +#include "utils/vmem_tracker.h" +#include "storage/shmem.h" + +#ifndef __linux__ +#error cgroup is only available on linux +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +CGroupOpsRoutine *cgroupOpsRoutine; +CGroupSystemInfo *cgroupSystemInfo; + +/* cgroup component names. */ +const char *component_names[CGROUP_COMPONENT_COUNT] = +{ + "cpu", "cpuacct", "memory", "cpuset" +}; + +/* cgroup component dirs. */ +char component_dirs[CGROUP_COMPONENT_COUNT][MAX_CGROUP_PATHLEN] = +{ + FALLBACK_COMP_DIR, FALLBACK_COMP_DIR, FALLBACK_COMP_DIR, FALLBACK_COMP_DIR +}; + + +/* + * Get the name of cgroup controller component. + */ +const char * +getComponentName(CGroupComponentType component) +{ + Assert(component > CGROUP_COMPONENT_UNKNOWN); + Assert(component < CGROUP_COMPONENT_COUNT); + + return component_names[component]; +} + + +/* + * Get the component type from the cgroup controller name. + */ +CGroupComponentType +getComponentType(const char *name) +{ + CGroupComponentType component; + + for (component = 0; component < CGROUP_COMPONENT_COUNT; component++) + if (strcmp(name, getComponentName(component)) == 0) + return component; + + return CGROUP_COMPONENT_UNKNOWN; +} + + +/* + * Get the directory of component. + */ +const char * +getComponentDir(CGroupComponentType component) +{ + Assert(component > CGROUP_COMPONENT_UNKNOWN); + Assert(component < CGROUP_COMPONENT_COUNT); + + return component_dirs[component]; +} + +/* + * Set the component dir of component. + */ +void +setComponentDir(CGroupComponentType component, const char *dir) +{ + Assert(component > CGROUP_COMPONENT_UNKNOWN); + Assert(component < CGROUP_COMPONENT_COUNT); + Assert(strlen(dir) < MAX_CGROUP_PATHLEN); + + strcpy(component_dirs[component], dir); +} + +/* + * Build path string with parameters. + * + * Will raise an exception if the path buffer is not large enough. + * + * Examples (path and path_size are omitted): + * + * - buildPath(ROOT, PARENT, CPU, "" ): /sys/fs/cgroup/cpu + * - buildPath(ROOT, PARENT, CPU, "tasks"): /sys/fs/cgroup/cpu/tasks + * - buildPath(ROOT, GPDB , CPU, "tasks"): /sys/fs/cgroup/cpu/gpdb/tasks + * + * - buildPath(ROOT, PARENT, ALL, " "): /sys/fs/cgroup/ + * - buildPath(ROOT, PARENT, ALL, "tasks"): /sys/fs/cgroup/tasks + * - buildPath(ROOT, GPDB , ALL, "tasks"): /sys/fs/cgroup/gpdb/tasks + * + * - buildPath(6437, GPDB , CPU, "tasks"): /sys/fs/cgroup/cpu/gpdb/6437/tasks + * - buildPath(6437, GPDB , ALL, "tasks"): /sys/fs/cgroup/gpdb/6437/tasks + */ +void +buildPath(Oid group, + BaseDirType base, + CGroupComponentType component, + const char *filename, + char *path, + size_t path_size) +{ + bool result = buildPathSafe(group, base, component, filename, path, path_size); + + if (!result) + { + CGROUP_CONFIG_ERROR("invalid %s name '%s': %m", + filename[0] ? "file" : "directory", + path); + } +} + +/* + * Build path string with parameters. + * + * Return false if the path buffer is not large enough, errno will also be set. + */ +bool +buildPathSafe(Oid group, + BaseDirType base, + CGroupComponentType component, + const char *filename, + char *path, + size_t path_size) +{ + const char *component_name = getComponentName(component); + const char *component_dir = component_name; + const char *base_dir = ""; + char group_dir[MAX_CGROUP_PATHLEN] = ""; + int len; + + Assert(cgroupSystemInfo->cgroup_dir[0] != 0); + Assert(base == BASEDIR_GPDB || base == BASEDIR_PARENT); + + if (base == BASEDIR_GPDB) + base_dir = "/gpdb"; + else + base_dir = ""; + + /* add group name to the path */ + if (group != CGROUP_ROOT_ID) + { + len = snprintf(group_dir, sizeof(group_dir), "/%u", group); + /* We are sure group_dir is large enough */ + Assert(len > 0 && len < sizeof(group_dir)); + } + + if (component != CGROUP_COMPONENT_PLAIN) + { + /* + * for cgroup v1, we need add the component name to the path, + * such as "/gpdb/cpu/...", "/gpdb/cpuset/...". + */ + len = snprintf(path, path_size, "%s/%s%s%s/%s", + cgroupSystemInfo->cgroup_dir, component_dir, base_dir, group_dir, filename); + } + else + { + /* + * for cgroup v2, we just have the top level and child level, + * don't need to care about the component. + */ + base_dir = base == BASEDIR_GPDB ? "gpdb" : ""; + len = snprintf(path, path_size, "%s/%s%s/%s", + cgroupSystemInfo->cgroup_dir, base_dir, group_dir, filename); + } + + if (len >= path_size || len < 0) + { + errno = ENAMETOOLONG; + return false; + } + + return true; +} + +/* + * Validate a component dir. + * + * Return true if it exists and has right permissions, + * otherwise return false. + */ +bool +validateComponentDir(CGroupComponentType component) +{ + char path[MAX_CGROUP_PATHLEN]; + size_t path_size = sizeof(path); + + if (!buildPathSafe(CGROUP_ROOT_ID, BASEDIR_GPDB, component, "", + path, path_size)) + return false; + + return access(path, R_OK | W_OK | X_OK) == 0; +} + +/* + * Lock a dir + */ +int +lockDir(const char *path, bool block) +{ + int fd_dir; + + fd_dir = open(path, O_RDONLY); + if (fd_dir < 0) + { + if (errno == ENOENT) + /* the dir doesn't exist, nothing to do */ + return -1; + + CGROUP_ERROR("can't open dir to lock: %s: %m", path); + } + + int flags = LOCK_EX; + if (!block) + flags |= LOCK_NB; + + while (flock(fd_dir, flags)) + { + /* + * EAGAIN is not described in flock(2), + * however it does appear in practice. + */ + if (errno == EAGAIN) + continue; + + int err = errno; + close(fd_dir); + + /* + * In block mode all errors should be reported; + * In non block mode only report errors != EWOULDBLOCK. + */ + if (block || err != EWOULDBLOCK) + CGROUP_ERROR("can't lock dir: %s: %s", path, strerror(err)); + return -1; + } + + /* + * Even if we acquired the lock the dir may still been removed by other + * processes, e.g.: + * + * 1: open() + * 1: flock() -- process 1 acquire the lock + * + * 2: open() + * 2: flock() -- blocked by process 1 + * + * 1: rmdir() + * 1: close() -- process 1 released the lock + * + * 2:flock() will now return w/o error as process 2 still has a valid + * fd (reference) on the target dir, and process 2 does acquire the lock + * successfully. However, as the dir is already removed so process 2 + * shouldn't make any further operation (rmdir(), etc.) on the dir. + * + * So we check for the existence of the dir again and give up if it's + * already removed. + */ + if (access(path, F_OK)) + { + /* the dir is already removed by other process, nothing to do */ + close(fd_dir); + return -1; + } + + return fd_dir; +} + +/* + * Create cgroup dir + */ +bool +createDir(Oid group, CGroupComponentType component) +{ + char path[MAX_CGROUP_PATHLEN]; + size_t path_size = sizeof(path); + + buildPath(group, BASEDIR_GPDB, component, "", path, path_size); + + if (mkdir(path, 0755) && errno != EEXIST) + return false; + + return true; +} + + +/* + * Read at most datasize bytes from a file. + */ +size_t +readData(const char *path, char *data, size_t datasize) +{ + int fd = open(path, O_RDONLY); + if (fd < 0) + elog(ERROR, "can't open file '%s': %m", path); + + ssize_t ret = read(fd, data, datasize); + + /* save errno before close() */ + int err = errno; + close(fd); + + if (ret < 0) + elog(ERROR, "can't read data from file '%s': %s", path, strerror(err)); + + return ret; +} + +/* + * Write datasize bytes to a file. + */ +void +writeData(const char *path, const char *data, size_t datasize) +{ + int fd = open(path, O_WRONLY); + if (fd < 0) + elog(ERROR, "can't open file '%s': %m", path); + + ssize_t ret = write(fd, data, datasize); + + /* save errno before close */ + int err = errno; + close(fd); + + if (ret < 0) + elog(ERROR, "can't write data to file '%s': %s", path, strerror(err)); + if (ret != datasize) + elog(ERROR, "can't write all data to file '%s'", path); +} + +/* + * Read an int64 value from a cgroup interface file. + */ +int64 +readInt64(Oid group, BaseDirType base, CGroupComponentType component, + const char *filename) +{ + int64 x; + char data[MAX_INT_STRING_LEN]; + size_t data_size = sizeof(data); + char path[MAX_CGROUP_PATHLEN]; + size_t path_size = sizeof(path); + + buildPath(group, base, component, filename, path, path_size); + + readData(path, data, data_size); + + if (sscanf(data, "%lld", (long long *) &x) != 1) + CGROUP_ERROR("invalid number '%s'", data); + + return x; +} + +/* + * Write an int64 value to a cgroup interface file. + */ +void +writeInt64(Oid group, BaseDirType base, CGroupComponentType component, + const char *filename, int64 x) +{ + char data[MAX_INT_STRING_LEN]; + size_t data_size = sizeof(data); + char path[MAX_CGROUP_PATHLEN]; + size_t path_size = sizeof(path); + + buildPath(group, base, component, filename, path, path_size); + + snprintf(data, data_size, "%lld", (long long) x); + + writeData(path, data, strlen(data)); +} + +/* + * Read a string value from a cgroup interface file. + */ +void +readStr(Oid group, BaseDirType base, CGroupComponentType component, + const char *filename, char *str, int len) +{ + char data[MAX_INT_STRING_LEN]; + size_t data_size = sizeof(data); + char path[MAX_CGROUP_PATHLEN]; + size_t path_size = sizeof(path); + + buildPath(group, base, component, filename, path, path_size); + + readData(path, data, data_size); + + strlcpy(str, data, len); +} + +/* + * Write a string value to a cgroup interface file. + */ +void +writeStr(Oid group, BaseDirType base, CGroupComponentType component, + const char *filename, const char *strValue) +{ + char path[MAX_CGROUP_PATHLEN]; + size_t path_size = sizeof(path); + + buildPath(group, base, component, filename, path, path_size); + + writeData(path, strValue, strlen(strValue)); +} + + +bool +deleteDir(Oid group, CGroupComponentType component, const char *filename, bool unassign, + void (*detachcgroup) (Oid group, CGroupComponentType component, int fd_dir)) +{ + + char path[MAX_CGROUP_PATHLEN]; + size_t path_size = sizeof(path); + + int retry = unassign ? 0 : MAX_RETRY - 1; + int fd_dir; + + buildPath(group, BASEDIR_GPDB, component, "", path, path_size); + + /* + * To prevent race condition between multiple processes we require a dir + * to be removed with the lock acquired first. + */ + fd_dir = lockDir(path, true); + + /* the dir is already removed */ + if (fd_dir < 0) + return true; + + /* + * Reset the corresponding control file to zero + * RG_FIXME: Can we remove this? + */ + if (filename) + writeInt64(group, BASEDIR_GPDB, component, filename, 0); + + while (++retry <= MAX_RETRY) + { + if (unassign) + detachcgroup(group, component, fd_dir); + + if (rmdir(path)) + { + int err = errno; + + if (err == EBUSY && unassign && retry < MAX_RETRY) + { + elog(DEBUG1, "can't remove dir, will retry: %s: %s", + path, strerror(err)); + pg_usleep(1000); + continue; + } + + /* + * we don't check for ENOENT again as we already acquired the lock + * on this dir and the dir still exist at that time, so if then + * it's removed by other processes then it's a bug. + */ + elog(DEBUG1, "can't remove dir, ignore the error: %s: %s", + path, strerror(err)); + } + break; + } + + if (retry <= MAX_RETRY) + elog(DEBUG1, "cgroup dir '%s' removed", path); + + /* close() also releases the lock */ + close(fd_dir); + + return true; +} + + +int +getCPUCores(void) +{ + int cpucores = 0; + + /* + * cpuset ops requires _GNU_SOURCE to be defined, + * and _GNU_SOURCE is forced on in src/template/linux, + * so we assume these ops are always available on linux. + */ + cpu_set_t cpuset; + int i; + + if (sched_getaffinity(0, sizeof(cpuset), &cpuset) < 0) + CGROUP_ERROR("can't get cpu cores: %m"); + + for (i = 0; i < CPU_SETSIZE; i++) + { + if (CPU_ISSET(i, &cpuset)) + cpucores++; + } + + if (cpucores == 0) + CGROUP_ERROR("can't get cpu cores"); + + return cpucores; +} + + +/* + * Get the mount directory of cgroup, the basic method is to read the file "/proc/self/mounts". + * Normally, cgroup version 1 will return "/sys/fs/cgroup/xxx", so we need remove the "xxx", but + * version 2 do not need this. + */ +bool +getCgroupMountDir() +{ + struct mntent *me; + FILE *fp; + + if (strlen(cgroupSystemInfo->cgroup_dir) != 0) + return true; + + memset(cgroupSystemInfo->cgroup_dir,'\0',sizeof(cgroupSystemInfo->cgroup_dir)); + + fp = setmntent(PROC_MOUNTS, "r"); + if (fp == NULL) + CGROUP_CONFIG_ERROR("can not open '%s' for read", PROC_MOUNTS); + + while ((me = getmntent(fp))) + { + char * p; + + if (!gp_resource_group_enable_cgroup_version_two) + { + /* For version 1, we need to find the mnt_type equals to "cgroup" */ + if (strcmp(me->mnt_type, "cgroup")) + continue; + + strncpy(cgroupSystemInfo->cgroup_dir, me->mnt_dir, sizeof(cgroupSystemInfo->cgroup_dir) - 1); + + p = strrchr(cgroupSystemInfo->cgroup_dir, '/'); + + if (p == NULL) + CGROUP_CONFIG_ERROR("cgroup mount point parse error: %s", cgroupSystemInfo->cgroup_dir); + else + *p = 0; + } + else + { + /* For version 2, we need to find the mnt_type equals to "cgroup2" */ + if (strcmp(me->mnt_type, "cgroup2")) + continue; + + strncpy(cgroupSystemInfo->cgroup_dir, me->mnt_dir, sizeof(cgroupSystemInfo->cgroup_dir)); + } + + break; + } + + endmntent(fp); + + return strlen(cgroupSystemInfo->cgroup_dir) != 0; +} + +/* get vm.overcommit_ratio */ +static int +getOvercommitRatio(void) +{ + int ratio; + char data[MAX_INT_STRING_LEN]; + size_t datasize = sizeof(data); + const char *path = "/proc/sys/vm/overcommit_ratio"; + + readData(path, data, datasize); + + if (sscanf(data, "%d", &ratio) != 1) + elog(ERROR, "invalid number '%s' in '%s'", data, path); + + return ratio; +} + +/* get cgroup ram and swap (in Byte) */ +static void +getCgMemoryInfo(uint64 *cgram, uint64 *cgmemsw) +{ + CGroupComponentType component = CGROUP_COMPONENT_MEMORY; + + *cgram = readInt64(CGROUP_ROOT_ID, BASEDIR_PARENT, + component, "memory.limit_in_bytes"); + + if (gp_resource_group_enable_cgroup_swap) + { + *cgmemsw = readInt64(CGROUP_ROOT_ID, BASEDIR_PARENT, + component, "memory.memsw.limit_in_bytes"); + } + else + { + elog(DEBUG1, "swap memory is unlimited"); + *cgmemsw = (uint64) -1LL; + } +} + +/* get total ram and total swap (in Byte) from sysinfo */ +static void +getMemoryInfo(unsigned long *ram, unsigned long *swap) +{ + struct sysinfo info; + if (sysinfo(&info) < 0) + elog(ERROR, "can't get memory information: %m"); + *ram = info.totalram; + *swap = info.totalswap; +} + +int +getTotalMemory(void) +{ + unsigned long ram, swap, total; + int overcommitRatio; + uint64 cgram, cgmemsw; + uint64 memsw; + uint64 outTotal; + + overcommitRatio = getOvercommitRatio(); + getMemoryInfo(&ram, &swap); + /* Get sysinfo total ram and swap size. */ + memsw = ram + swap; + outTotal = swap + ram * overcommitRatio / 100; + getCgMemoryInfo(&cgram, &cgmemsw); + ram = Min(ram, cgram); + /* + * In the case that total ram and swap read from sysinfo is larger than + * from cgroup, ram and swap must both be limited, otherwise swap must + * not be limited(we can safely use the value from sysinfo as swap size). + */ + if (cgmemsw < memsw) + swap = cgmemsw - ram; + /* + * If it is in container, the total memory is limited by both the total + * memoery outside and the memsw of the container. + */ + total = Min(outTotal, swap + ram); + return total >> BITS_IN_MB; +} diff --git a/src/backend/utils/resgroup/resgroup-ops-dummy.c b/src/backend/utils/resgroup/resgroup-ops-dummy.c deleted file mode 100644 index 533c13b20f6..00000000000 --- a/src/backend/utils/resgroup/resgroup-ops-dummy.c +++ /dev/null @@ -1,263 +0,0 @@ -/*------------------------------------------------------------------------- - * - * resgroup-ops-dummy.c - * OS dependent resource group operations - dummy implementation - * - * Copyright (c) 2017 VMware, Inc. or its affiliates. - * - * - * IDENTIFICATION - * src/backend/utils/resgroup/resgroup-ops-dummy.c - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#include "utils/resgroup.h" -#include "utils/resgroup-ops.h" - -/* - * Interfaces for OS dependent operations. - * - * Resource group relies on OS dependent group implementation to manage - * resources like cpu usage, such as cgroup on Linux system. - * We call it OS group in below function description. - * - * So far these operations are mainly for CPU rate limitation and accounting. - */ - -#define unsupported_system() \ - elog(WARNING, "resource group is not supported on this system") - -/* Return the name for the OS group implementation */ -const char * -ResGroupOps_Name(void) -{ - return "unsupported"; -} - -/* - * Probe the configuration for the OS group implementation. - * - * Return true if everything is OK, or false is some requirements are not - * satisfied. Will not fail in either case. - */ -bool -ResGroupOps_Probe(void) -{ - return false; -} - -/* Check whether the OS group implementation is available and useable */ -void -ResGroupOps_Bless(void) -{ - unsupported_system(); -} - -/* Initialize the OS group */ -void -ResGroupOps_Init(void) -{ - unsupported_system(); -} - -/* Adjust GUCs for this OS group implementation */ -void -ResGroupOps_AdjustGUCs(void) -{ - unsupported_system(); -} - -/* - * Create the OS group for group. - */ -void -ResGroupOps_CreateGroup(Oid group) -{ - unsupported_system(); -} - -/* - * Destroy the OS group for group. - * - * One OS group can not be dropped if there are processes running under it, - * if migrate is true these processes will be moved out automatically. - */ -void -ResGroupOps_DestroyGroup(Oid group, bool migrate) -{ - unsupported_system(); -} - -/* - * Assign a process to the OS group. A process can only be assigned to one - * OS group, if it's already running under other OS group then it'll be moved - * out that OS group. - * - * pid is the process id. - */ -void -ResGroupOps_AssignGroup(Oid group, ResGroupCaps *caps, int pid) -{ - unsupported_system(); -} - -/* - * Lock the OS group. While the group is locked it won't be removed by other - * processes. - * - * This function would block if block is true, otherwise it return with -1 - * immediately. - * - * On success it return a fd to the OS group, pass it to - * ResGroupOps_UnLockGroup() to unlock it. - */ -int -ResGroupOps_LockGroup(Oid group, ResGroupCompType comp, bool block) -{ - unsupported_system(); - return -1; -} - -/* - * Unblock a OS group. - * - * fd is the value returned by ResGroupOps_LockGroup(). - */ -void -ResGroupOps_UnLockGroup(Oid group, int fd) -{ - unsupported_system(); -} - -/* - * Set the cpu rate limit for the OS group. - * - * cpu_rate_limit should be within [0, 100]. - */ -void -ResGroupOps_SetCpuRateLimit(Oid group, int cpu_rate_limit) -{ - unsupported_system(); -} - -/* - * Set the memory limit for the OS group by rate. - * - * memory_limit should be within [0, 100]. - */ -void -ResGroupOps_SetMemoryLimit(Oid group, int memory_limit) -{ - unsupported_system(); -} - -/* - * Set the memory limit for the OS group by value. - * - * memory_limit is the limit value in chunks - */ -void -ResGroupOps_SetMemoryLimitByValue(Oid group, int32 memory_limit) -{ - unsupported_system(); -} - -/* - * Get the cpu usage of the OS group, that is the total cpu time obtained - * by this OS group, in nano seconds. - */ -int64 -ResGroupOps_GetCpuUsage(Oid group) -{ - unsupported_system(); - return 0; -} - -/* - * Get the memory usage of the OS group - * - * memory usage is returned in chunks - */ -int32 -ResGroupOps_GetMemoryUsage(Oid group) -{ - unsupported_system(); - return 0; -} - -/* - * Get the memory limit of the OS group - * - * memory limit is returned in chunks - */ -int32 -ResGroupOps_GetMemoryLimit(Oid group) -{ - unsupported_system(); - return 0; -} - -/* - * Get the count of cpu cores on the system. - */ -int -ResGroupOps_GetCpuCores(void) -{ - unsupported_system(); - return 1; -} - -/* - * Get the total memory on the system. - * (total RAM * overcommit_ratio + total Swap) - */ -int -ResGroupOps_GetTotalMemory(void) -{ - unsupported_system(); - return 0; -} - -/* - * Set the cpuset for the OS group. - * @param group: the destination group - * @param cpuset: the value to be set - * The syntax of CPUSET is a combination of the tuples, each tuple represents - * one core number or the core numbers interval, separated by comma. - * E.g. 0,1,2-3. - */ -void -ResGroupOps_SetCpuSet(Oid group, const char *cpuset) -{ - unsupported_system(); -} - -/* - * Get the cpuset of the OS group. - * @param group: the destination group - * @param cpuset: the str to be set - * @param len: the upper limit of the str - */ -void -ResGroupOps_GetCpuSet(Oid group, char *cpuset, int len) -{ - unsupported_system(); -} - -/* - * Convert the cpu usage to percentage within the duration. - * - * usage is the delta of GetCpuUsage() of a duration, - * duration is in micro seconds. - * - * When fully consuming one cpu core the return value will be 100.0 . - */ -float -ResGroupOps_ConvertCpuUsageToPercent(int64 usage, int64 duration) -{ - unsupported_system(); - return 0.0; -} diff --git a/src/backend/utils/resgroup/resgroup-ops-linux.c b/src/backend/utils/resgroup/resgroup-ops-linux.c deleted file mode 100644 index 8742ec2e3f7..00000000000 --- a/src/backend/utils/resgroup/resgroup-ops-linux.c +++ /dev/null @@ -1,2018 +0,0 @@ -/*------------------------------------------------------------------------- - * - * resgroup-ops-linux.c - * OS dependent resource group operations - cgroup implementation - * - * Copyright (c) 2017 VMware, Inc. or its affiliates. - * - * - * IDENTIFICATION - * src/backend/utils/resgroup/resgroup-ops-linux.c - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#include - -#include "cdb/cdbvars.h" -#include "miscadmin.h" -#include "utils/resgroup.h" -#include "utils/resgroup-ops.h" -#include "utils/vmem_tracker.h" - -#ifndef __linux__ -#error cgroup is only available on linux -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Interfaces for OS dependent operations. - * - * Resource group relies on OS dependent group implementation to manage - * resources like cpu usage, such as cgroup on Linux system. - * We call it OS group in below function description. - * - * So far these operations are mainly for CPU rate limitation and accounting. - */ - -#define CGROUP_ERROR(...) elog(ERROR, __VA_ARGS__) -#define CGROUP_CONFIG_ERROR(...) \ - CGROUP_ERROR("cgroup is not properly configured: " __VA_ARGS__) - -#define FALLBACK_COMP_DIR "" -#define PROC_MOUNTS "/proc/self/mounts" -#define MAX_INT_STRING_LEN 20 -#define MAX_RETRY 10 - -/* - * cgroup memory permission is only mandatory on 6.x and master; - * on 5.x we need to make it optional to provide backward compatibilities. - */ -#define CGROUP_MEMORY_IS_OPTIONAL (GP_VERSION_NUM < 60000) -/* - * cpuset permission is only mandatory on 6.x and master; - * on 5.x we need to make it optional to provide backward compatibilities. - */ -#define CGROUP_CPUSET_IS_OPTIONAL (GP_VERSION_NUM < 60000) - -typedef enum BaseType BaseType; -typedef struct PermItem PermItem; -typedef struct PermList PermList; - -enum BaseType -{ - BASETYPE_GPDB, /* translate to "/gpdb" */ - BASETYPE_PARENT, /* translate to "" */ -}; - -struct PermItem -{ - ResGroupCompType comp; - const char *prop; - int perm; -}; - -struct PermList -{ - const PermItem *items; - bool optional; - bool *presult; -}; - -#define foreach_perm_list(i, lists) \ - for ((i) = 0; (lists)[(i)].items; (i)++) - -#define foreach_perm_item(i, items) \ - for ((i) = 0; (items)[(i)].comp != RESGROUP_COMP_TYPE_UNKNOWN; (i)++) - -#define foreach_comp_type(comp) \ - for ((comp) = RESGROUP_COMP_TYPE_FIRST; \ - (comp) < RESGROUP_COMP_TYPE_COUNT; \ - (comp)++) - -static const char *compGetName(ResGroupCompType comp); -static ResGroupCompType compByName(const char *name); -static const char *compGetDir(ResGroupCompType comp); -static void compSetDir(ResGroupCompType comp, const char *dir); -static void detectCompDirs(void); -static bool validateCompDir(ResGroupCompType comp); -static void dumpCompDirs(void); - -static char *buildPath(Oid group, BaseType base, ResGroupCompType comp, const char *prop, char *path, size_t pathsize); -static char *buildPathSafe(Oid group, BaseType base, ResGroupCompType comp, const char *prop, char *path, size_t pathsize); -static int lockDir(const char *path, bool block); -static void unassignGroup(Oid group, ResGroupCompType comp, int fddir); -static bool createDir(Oid group, ResGroupCompType comp); -static bool removeDir(Oid group, ResGroupCompType comp, const char *prop, bool unassign); -static int getCpuCores(void); -static size_t readData(const char *path, char *data, size_t datasize); -static void writeData(const char *path, const char *data, size_t datasize); -static int64 readInt64(Oid group, BaseType base, ResGroupCompType comp, const char *prop); -static void writeInt64(Oid group, BaseType base, ResGroupCompType comp, const char *prop, int64 x); -static void readStr(Oid group, BaseType base, ResGroupCompType comp, const char *prop, char *str, int len); -static void writeStr(Oid group, BaseType base, ResGroupCompType comp, const char *prop, const char *strValue); -static bool permListCheck(const PermList *permlist, Oid group, bool report); -static bool checkPermission(Oid group, bool report); -static bool checkCpuSetPermission(Oid group, bool report); -static void checkCompHierarchy(); -static void getMemoryInfo(unsigned long *ram, unsigned long *swap); -static void getCgMemoryInfo(uint64 *cgram, uint64 *cgmemsw); -static int getOvercommitRatio(void); -static bool detectCgroupMountPoint(void); -static void initCpu(void); -static void initCpuSet(void); -static void createDefaultCpuSetGroup(void); -static int64 getCfsPeriodUs(ResGroupCompType); - -/* - * currentGroupIdInCGroup & oldCaps are used for reducing redundant - * file operations - */ -static Oid currentGroupIdInCGroup = InvalidOid; -static ResGroupCaps oldCaps; - -static char cgdir[MAXPATHLEN]; - -static int64 system_cfs_quota_us = -1LL; -static int64 parent_cfs_quota_us = -1LL; -static int ncores; - -/* - * These checks should keep in sync with gpMgmt/bin/gpcheckresgroupimpl - */ -static const PermItem perm_items_cpu[] = -{ - { RESGROUP_COMP_TYPE_CPU, "", R_OK | W_OK | X_OK }, - { RESGROUP_COMP_TYPE_CPU, "cgroup.procs", R_OK | W_OK }, - { RESGROUP_COMP_TYPE_CPU, "cpu.cfs_period_us", R_OK | W_OK }, - { RESGROUP_COMP_TYPE_CPU, "cpu.cfs_quota_us", R_OK | W_OK }, - { RESGROUP_COMP_TYPE_CPU, "cpu.shares", R_OK | W_OK }, - { RESGROUP_COMP_TYPE_UNKNOWN, NULL, 0 } -}; -static const PermItem perm_items_cpu_acct[] = -{ - { RESGROUP_COMP_TYPE_CPUACCT, "", R_OK | W_OK | X_OK }, - { RESGROUP_COMP_TYPE_CPUACCT, "cgroup.procs", R_OK | W_OK }, - { RESGROUP_COMP_TYPE_CPUACCT, "cpuacct.usage", R_OK }, - { RESGROUP_COMP_TYPE_CPUACCT, "cpuacct.stat", R_OK }, - { RESGROUP_COMP_TYPE_UNKNOWN, NULL, 0 } -}; -static const PermItem perm_items_cpuset[] = -{ - { RESGROUP_COMP_TYPE_CPUSET, "", R_OK | W_OK | X_OK }, - { RESGROUP_COMP_TYPE_CPUSET, "cgroup.procs", R_OK | W_OK }, - { RESGROUP_COMP_TYPE_CPUSET, "cpuset.cpus", R_OK | W_OK }, - { RESGROUP_COMP_TYPE_CPUSET, "cpuset.mems", R_OK | W_OK }, - { RESGROUP_COMP_TYPE_UNKNOWN, NULL, 0 } -}; -static const PermItem perm_items_memory[] = -{ - { RESGROUP_COMP_TYPE_MEMORY, "", R_OK | W_OK | X_OK }, - { RESGROUP_COMP_TYPE_MEMORY, "memory.limit_in_bytes", R_OK | W_OK }, - { RESGROUP_COMP_TYPE_MEMORY, "memory.usage_in_bytes", R_OK }, - { RESGROUP_COMP_TYPE_UNKNOWN, NULL, 0 } -}; -static const PermItem perm_items_swap[] = -{ - { RESGROUP_COMP_TYPE_MEMORY, "", R_OK | W_OK | X_OK }, - { RESGROUP_COMP_TYPE_MEMORY, "memory.memsw.limit_in_bytes", R_OK | W_OK }, - { RESGROUP_COMP_TYPE_MEMORY, "memory.memsw.usage_in_bytes", R_OK }, - { RESGROUP_COMP_TYPE_UNKNOWN, NULL, 0 } -}; - -/* - * just for cpuset check, same as the cpuset Permlist in permlists - */ -static const PermList cpusetPermList = -{ - perm_items_cpuset, - CGROUP_CPUSET_IS_OPTIONAL, - &gp_resource_group_enable_cgroup_cpuset, -}; - -/* - * Permission groups. - */ -static const PermList permlists[] = -{ - /* - * swap permissions are optional. - * - * cgroup/memory/memory.memsw.* is only available if - * - CONFIG_MEMCG_SWAP_ENABLED=on in kernel config, or - * - swapaccount=1 in kernel cmdline. - * - * Without these interfaces the swap usage can not be limited or accounted - * via cgroup. - */ - { perm_items_swap, true, &gp_resource_group_enable_cgroup_swap }, - - /* - * memory permissions can be mandatory or optional depends on the switch. - * - * resgroup memory auditor is introduced in 6.0 devel and backported - * to 5.x branch since 5.6.1. To provide backward compatibilities memory - * permissions are optional on 5.x branch. - */ - { perm_items_memory, CGROUP_MEMORY_IS_OPTIONAL, - &gp_resource_group_enable_cgroup_memory }, - - /* cpu/cpuacct permissions are mandatory */ - { perm_items_cpu, false, NULL }, - { perm_items_cpu_acct, false, NULL }, - - /* - * cpuset permissions can be mandatory or optional depends on the switch. - * - * resgroup cpuset is introduced in 6.0 devel and backported - * to 5.x branch since 5.6.1. To provide backward compatibilities cpuset - * permissions are optional on 5.x branch. - */ - { perm_items_cpuset, CGROUP_CPUSET_IS_OPTIONAL, - &gp_resource_group_enable_cgroup_cpuset}, - - { NULL, false, NULL } -}; - -/* - * Comp names. - */ -const char *compnames[RESGROUP_COMP_TYPE_COUNT] = -{ - "cpu", "cpuacct", "memory", "cpuset" -}; - -/* - * Comp dirs. - */ -char compdirs[RESGROUP_COMP_TYPE_COUNT][MAXPATHLEN] = -{ - FALLBACK_COMP_DIR, FALLBACK_COMP_DIR, FALLBACK_COMP_DIR, FALLBACK_COMP_DIR -}; - -/* - * Get the name of comp. - */ -static const char * -compGetName(ResGroupCompType comp) -{ - Assert(comp > RESGROUP_COMP_TYPE_UNKNOWN); - Assert(comp < RESGROUP_COMP_TYPE_COUNT); - - return compnames[comp]; -} - -/* - * Get the comp type from name. - */ -static ResGroupCompType -compByName(const char *name) -{ - ResGroupCompType comp; - - for (comp = 0; comp < RESGROUP_COMP_TYPE_COUNT; comp++) - if (strcmp(name, compGetName(comp)) == 0) - return comp; - - return RESGROUP_COMP_TYPE_UNKNOWN; -} - -/* - * Get the comp dir of comp. - */ -static const char * -compGetDir(ResGroupCompType comp) -{ - Assert(comp > RESGROUP_COMP_TYPE_UNKNOWN); - Assert(comp < RESGROUP_COMP_TYPE_COUNT); - - return compdirs[comp]; -} - -/* - * Set the comp dir of comp. - */ -static void -compSetDir(ResGroupCompType comp, const char *dir) -{ - Assert(comp > RESGROUP_COMP_TYPE_UNKNOWN); - Assert(comp < RESGROUP_COMP_TYPE_COUNT); - Assert(strlen(dir) < MAXPATHLEN); - - strcpy(compdirs[comp], dir); -} - -/* - * Detect gpdb cgroup component dirs. - * - * Take cpu for example, by default we expect gpdb dir to locate at - * cgroup/cpu/gpdb. But we'll also check for the cgroup dirs of init process - * (pid 1), e.g. cgroup/cpu/custom, then we'll look for gpdb dir at - * cgroup/cpu/custom/gpdb, if it's found and has good permissions, it can be - * used instead of the default one. - * - * If any of the gpdb cgroup component dir can not be found under init process' - * cgroup dirs or has bad permissions we'll fallback all the gpdb cgroup - * component dirs to the default ones. - * - * NOTE: This auto detection will look for memory & cpuset gpdb dirs even on - * 5X. - */ -static void -detectCompDirs(void) -{ - ResGroupCompType comp; - FILE *f; - char buf[MAXPATHLEN * 2]; - int maskAll = (1 << RESGROUP_COMP_TYPE_COUNT) - 1; - int maskDetected = 0; - - f = fopen("/proc/1/cgroup", "r"); - if (!f) - goto fallback; - - /* - * format: id:comps:path, e.g.: - * - * 10:cpuset:/ - * 4:cpu,cpuacct:/ - * 1:name=systemd:/init.scope - * 0::/init.scope - */ - while (fscanf(f, "%*d:%s", buf) != EOF) - { - ResGroupCompType comps[RESGROUP_COMP_TYPE_COUNT]; - int ncomps = 0; - char *ptr; - char *tmp; - char sep = '\0'; - int i; - - /* buf is stored with "comps:path" */ - - if (buf[0] == ':') - continue; /* ignore empty comp */ - - /* split comps */ - for (ptr = buf; sep != ':'; ptr = tmp) - { - tmp = strpbrk(ptr, ":,="); - - sep = *tmp; - *tmp++ = 0; - - /* for name=comp case there is nothing to do with the name */ - if (sep == '=') - continue; - - comp = compByName(ptr); - - if (comp == RESGROUP_COMP_TYPE_UNKNOWN) - continue; /* not used by us */ - - /* - * push the comp to the comps stack, but if the stack is already - * full (which is unlikely to happen in real world), simply ignore - * it. - */ - if (ncomps < RESGROUP_COMP_TYPE_COUNT) - comps[ncomps++] = comp; - } - - /* now ptr point to the path */ - Assert(strlen(ptr) < MAXPATHLEN); - - /* if the path is "/" then use empty string "" instead of it */ - if (strcmp(ptr, "/") == 0) - ptr[0] = '\0'; - - /* validate and set path for the comps */ - for (i = 0; i < ncomps; i++) - { - comp = comps[i]; - compSetDir(comp, ptr); - - if (!validateCompDir(comp)) - goto fallback; /* dir missing or bad permissions */ - - if (maskDetected & (1 << comp)) - goto fallback; /* comp are detected more than once */ - - maskDetected |= 1 << comp; - } - } - - if (maskDetected != maskAll) - goto fallback; /* not all the comps are detected */ - - /* - * Dump the comp dirs for debugging? No! - * This function is executed before timezone initialization, logs are - * forbidden. - */ - - fclose(f); - return; - -fallback: - /* set the fallback dirs for all the comps */ - foreach_comp_type(comp) - { - compSetDir(comp, FALLBACK_COMP_DIR); - } - - fclose(f); -} - -/* - * Validate a comp dir. - * - * Return True if it exists and has good permissions, - * return False otherwise. - */ -static bool -validateCompDir(ResGroupCompType comp) -{ - char path[MAXPATHLEN]; - size_t pathsize = sizeof(path); - - if (!buildPathSafe(RESGROUP_ROOT_ID, BASETYPE_GPDB, comp, "", - path, pathsize)) - return false; - - return access(path, R_OK | W_OK | X_OK) == 0; -} - -/* - * Dump comp dirs. - */ -static void -dumpCompDirs(void) -{ - ResGroupCompType comp; - char path[MAXPATHLEN]; - size_t pathsize = sizeof(path); - - foreach_comp_type(comp) - { - buildPath(RESGROUP_ROOT_ID, BASETYPE_GPDB, comp, "", path, pathsize); - - elog(LOG, "gpdb dir for cgroup component \"%s\": %s", - compGetName(comp), path); - } -} - -/* - * Build path string with parameters. - * - * Will raise an exception if the path buffer is not large enough. - * - * Refer to buildPathSafe() for details. - */ -static char * -buildPath(Oid group, - BaseType base, - ResGroupCompType comp, - const char *prop, - char *path, - size_t pathsize) -{ - char *result = buildPathSafe(group, base, comp, prop, path, pathsize); - - if (!result) - { - CGROUP_CONFIG_ERROR("invalid %s name '%s': %m", - prop[0] ? "file" : "directory", - path); - } - - return result; -} - -/* - * Build path string with parameters. - * - * Return NULL if the path buffer is not large enough, errno will also be set. - * - * Examples (path and pathsize are omitted): - * - buildPath(ROOT, PARENT, CPU, "" ): /sys/fs/cgroup/cpu - * - buildPath(ROOT, PARENT, CPU, "tasks"): /sys/fs/cgroup/cpu/tasks - * - buildPath(ROOT, GPDB , CPU, "tasks"): /sys/fs/cgroup/cpu/gpdb/tasks - * - buildPath(6437, GPDB , CPU, "tasks"): /sys/fs/cgroup/cpu/gpdb/6437/tasks - */ -static char * -buildPathSafe(Oid group, - BaseType base, - ResGroupCompType comp, - const char *prop, - char *path, - size_t pathsize) -{ - const char *compname = compGetName(comp); - const char *compdir = compGetDir(comp); - const char *basedir = ""; - char groupdir[MAXPATHLEN] = ""; - int len; - - Assert(cgdir[0] != 0); - Assert(base == BASETYPE_GPDB || - base == BASETYPE_PARENT); - - if (base == BASETYPE_GPDB) - basedir = "/gpdb"; - else - basedir = ""; - - if (group != RESGROUP_ROOT_ID) - { - len = snprintf(groupdir, sizeof(groupdir), "/%u", group); - - /* We are sure groupdir is large enough */ - Assert(len > 0 && - len < sizeof(groupdir)); - } - - len = snprintf(path, pathsize, "%s/%s%s%s%s/%s", - cgdir, compname, compdir, basedir, groupdir, prop); - if (len >= pathsize || len < 0) - { - errno = ENAMETOOLONG; - return NULL; - } - - return path; -} - -/* - * Unassign all the processes from group. - * - * These processes will be moved to the gpdb toplevel cgroup. - * - * This function must be called with the gpdb toplevel dir locked, - * fddir is the fd for this lock, on any failure fddir will be closed - * (and unlocked implicitly) then an error is raised. - */ -static void -unassignGroup(Oid group, ResGroupCompType comp, int fddir) -{ - char path[MAXPATHLEN]; - size_t pathsize = sizeof(path); - char *buf; - size_t bufsize; - const size_t bufdeltasize = 512; - size_t buflen = -1; - int fdr = -1; - int fdw = -1; - - /* - * Check an operation result on path. - * - * Operation can be open(), close(), read(), write(), etc., which must - * set the errno on error. - * - * - condition describes the expected result of the operation; - * - action is the cleanup action on failure, such as closing the fd, - * multiple actions can be specified by putting them in brackets, - * such as (op1, op2); - * - message describes what's failed; - */ -#define __CHECK(condition, action, message) do { \ - if (!(condition)) \ - { \ - /* save errno in case it's changed in actions */ \ - int err = errno; \ - action; \ - CGROUP_ERROR(message ": %s: %s", path, strerror(err)); \ - } \ -} while (0) - - buildPath(group, BASETYPE_GPDB, comp, "cgroup.procs", path, pathsize); - - fdr = open(path, O_RDONLY); - __CHECK(fdr >= 0, ( close(fddir) ), "can't open file for read"); - - buflen = 0; - bufsize = bufdeltasize; - buf = palloc(bufsize); - - while (1) - { - int n = read(fdr, buf + buflen, bufdeltasize); - __CHECK(n >= 0, ( close(fdr), close(fddir) ), "can't read from file"); - - buflen += n; - - if (n < bufdeltasize) - break; - - bufsize += bufdeltasize; - buf = repalloc(buf, bufsize); - } - - close(fdr); - if (buflen == 0) - return; - - buildPath(RESGROUP_ROOT_ID, BASETYPE_GPDB, comp, "cgroup.procs", - path, pathsize); - - fdw = open(path, O_WRONLY); - __CHECK(fdw >= 0, ( close(fddir) ), "can't open file for write"); - - char *ptr = buf; - char *end = NULL; - long pid; - - /* - * as required by cgroup, only one pid can be migrated in each single - * write() call, so we have to parse the pids from the buffer first, - * then write them one by one. - */ - while (1) - { - pid = strtol(ptr, &end, 10); - __CHECK(pid != LONG_MIN && pid != LONG_MAX, - ( close(fdw), close(fddir) ), - "can't parse pid"); - - if (ptr == end) - break; - - char str[22]; - sprintf(str, "%ld", pid); - int n = write(fdw, str, strlen(str)); - if (n < 0) - { - elog(LOG, "failed to migrate pid to gpdb root cgroup: pid=%ld: %m", - pid); - } - else - { - __CHECK(n == strlen(str), - ( close(fdw), close(fddir) ), - "can't write to file"); - } - - ptr = end; - } - - close(fdw); - -#undef __CHECK -} - -/* - * Lock the dir specified by path. - * - * - path must be a dir path; - * - if block is true then lock in block mode, otherwise will give up if - * the dir is already locked; - */ -static int -lockDir(const char *path, bool block) -{ - int fddir; - - fddir = open(path, O_RDONLY); - if (fddir < 0) - { - if (errno == ENOENT) - { - /* the dir doesn't exist, nothing to do */ - return -1; - } - - CGROUP_ERROR("can't open dir to lock: %s: %m", path); - } - - int flags = LOCK_EX; - if (!block) - flags |= LOCK_NB; - - while (flock(fddir, flags)) - { - /* - * EAGAIN is not described in flock(2), - * however it does appear in practice. - */ - if (errno == EAGAIN) - continue; - - int err = errno; - close(fddir); - - /* - * In block mode all errors should be reported; - * In non block mode only report errors != EWOULDBLOCK. - */ - if (block || err != EWOULDBLOCK) - CGROUP_ERROR("can't lock dir: %s: %s", path, strerror(err)); - return -1; - } - - /* - * Even if we accquired the lock the dir may still been removed by other - * processes, e.g.: - * - * 1: open() - * 1: flock() -- process 1 accquired the lock - * - * 2: open() - * 2: flock() -- blocked by process 1 - * - * 1: rmdir() - * 1: close() -- process 1 released the lock - * - * 2:flock() will now return w/o error as process 2 still has a valid - * fd (reference) on the target dir, and process 2 does accquired the lock - * successfully. However as the dir is already removed so process 2 - * shouldn't make any further operation (rmdir(), etc.) on the dir. - * - * So we check for the existence of the dir again and give up if it's - * already removed. - */ - if (access(path, F_OK)) - { - /* the dir is already removed by other process, nothing to do */ - close(fddir); - return -1; - } - - return fddir; -} - -/* - * Create the cgroup dir for group. - */ -static bool -createDir(Oid group, ResGroupCompType comp) -{ - char path[MAXPATHLEN]; - size_t pathsize = sizeof(path); - - buildPath(group, BASETYPE_GPDB, comp, "", path, pathsize); - - if (mkdir(path, 0755) && errno != EEXIST) - return false; - - return true; -} - -/* - * Remove the cgroup dir for group. - * - * - if unassign is true then unassign all the processes first before removal; - */ -static bool -removeDir(Oid group, ResGroupCompType comp, const char *prop, bool unassign) -{ - char path[MAXPATHLEN]; - size_t pathsize = sizeof(path); - int retry = unassign ? 0 : MAX_RETRY - 1; - int fddir; - - buildPath(group, BASETYPE_GPDB, comp, "", path, pathsize); - - /* - * To prevent race condition between multiple processes we require a dir - * to be removed with the lock accquired first. - */ - fddir = lockDir(path, true); - if (fddir < 0) - { - /* the dir is already removed */ - return true; - } - - /* - * Reset the corresponding control file to zero - */ - if (prop) - writeInt64(group, BASETYPE_GPDB, comp, prop, 0); - - while (++retry <= MAX_RETRY) - { - if (unassign) - unassignGroup(group, comp, fddir); - - if (rmdir(path)) - { - int err = errno; - - if (err == EBUSY && unassign && retry < MAX_RETRY) - { - elog(DEBUG1, "can't remove dir, will retry: %s: %s", - path, strerror(err)); - pg_usleep(1000); - continue; - } - - /* - * we don't check for ENOENT again as we already accquired the lock - * on this dir and the dir still exist at that time, so if then - * it's removed by other processes then it's a bug. - */ - elog(DEBUG1, "can't remove dir, ignore the error: %s: %s", - path, strerror(err)); - } - break; - } - - if (retry <= MAX_RETRY) - elog(DEBUG1, "cgroup dir '%s' removed", path); - - /* close() also releases the lock */ - close(fddir); - - return true; -} - -/* - * Get the cpu cores assigned for current system or container. - * - * Suppose a physical machine has 8 cpu cores, 2 of them assigned to - * a container, then the return value is: - * - 8 if running directly on the machine; - * - 2 if running in the container; - */ -static int -getCpuCores(void) -{ - int cpucores = 0; - - /* - * cpuset ops requires _GNU_SOURCE to be defined, - * and _GNU_SOURCE is forced on in src/template/linux, - * so we assume these ops are always available on linux. - */ - cpu_set_t cpuset; - int i; - - if (sched_getaffinity(0, sizeof(cpuset), &cpuset) < 0) - CGROUP_ERROR("can't get cpu cores: %m"); - - for (i = 0; i < CPU_SETSIZE; i++) - { - if (CPU_ISSET(i, &cpuset)) - cpucores++; - } - - if (cpucores == 0) - CGROUP_ERROR("can't get cpu cores"); - - return cpucores; -} - -/* - * Read at most datasize bytes from a file. - */ -static size_t -readData(const char *path, char *data, size_t datasize) -{ - int fd = open(path, O_RDONLY); - if (fd < 0) - elog(ERROR, "can't open file '%s': %m", path); - - ssize_t ret = read(fd, data, datasize); - - /* save errno before close() */ - int err = errno; - close(fd); - - if (ret < 0) - elog(ERROR, "can't read data from file '%s': %s", path, strerror(err)); - - return ret; -} - -/* - * Write datasize bytes to a file. - */ -static void -writeData(const char *path, const char *data, size_t datasize) -{ - int fd = open(path, O_WRONLY); - if (fd < 0) - elog(ERROR, "can't open file '%s': %m", path); - - ssize_t ret = write(fd, data, datasize); - - /* save errno before close */ - int err = errno; - close(fd); - - if (ret < 0) - elog(ERROR, "can't write data to file '%s': %s", path, strerror(err)); - if (ret != datasize) - elog(ERROR, "can't write all data to file '%s'", path); -} - -/* - * Read an int64 value from a cgroup interface file. - */ -static int64 -readInt64(Oid group, BaseType base, ResGroupCompType comp, const char *prop) -{ - int64 x; - char data[MAX_INT_STRING_LEN]; - size_t datasize = sizeof(data); - char path[MAXPATHLEN]; - size_t pathsize = sizeof(path); - - buildPath(group, base, comp, prop, path, pathsize); - - readData(path, data, datasize); - - if (sscanf(data, "%lld", (long long *) &x) != 1) - CGROUP_ERROR("invalid number '%s'", data); - - return x; -} - -/* - * Write an int64 value to a cgroup interface file. - */ -static void -writeInt64(Oid group, BaseType base, - ResGroupCompType comp, const char *prop, int64 x) -{ - char data[MAX_INT_STRING_LEN]; - size_t datasize = sizeof(data); - char path[MAXPATHLEN]; - size_t pathsize = sizeof(path); - - buildPath(group, base, comp, prop, path, pathsize); - snprintf(data, datasize, "%lld", (long long) x); - - writeData(path, data, strlen(data)); -} - -/* - * Read a string value from a cgroup interface file. - */ -static void -readStr(Oid group, BaseType base, - ResGroupCompType comp, const char *prop, char *str, int len) -{ - char data[MAX_INT_STRING_LEN]; - size_t datasize = sizeof(data); - char path[MAXPATHLEN]; - size_t pathsize = sizeof(path); - - buildPath(group, base, comp, prop, path, pathsize); - - readData(path, data, datasize); - - strlcpy(str, data, len); -} - -/* - * Write an string value to a cgroup interface file. - */ -static void -writeStr(Oid group, BaseType base, - ResGroupCompType comp, const char *prop, const char *strValue) -{ - char path[MAXPATHLEN]; - size_t pathsize = sizeof(path); - - buildPath(group, base, comp, prop, path, pathsize); - writeData(path, strValue, strlen(strValue)); -} - -/* - * Check a list of permissions on group. - * - * - if all the permissions are met then return true; - * - otherwise: - * - raise an error if report is true and permlist is not optional; - * - or return false; - */ -static bool -permListCheck(const PermList *permlist, Oid group, bool report) -{ - char path[MAXPATHLEN]; - size_t pathsize = sizeof(path); - int i; - - if (group == RESGROUP_ROOT_ID && permlist->presult) - *permlist->presult = false; - - foreach_perm_item(i, permlist->items) - { - ResGroupCompType comp = permlist->items[i].comp; - const char *prop = permlist->items[i].prop; - int perm = permlist->items[i].perm; - - if (!buildPathSafe(group, BASETYPE_GPDB, comp, prop, path, pathsize)) - { - /* Buffer is not large enough for the path */ - - if (report && !permlist->optional) - { - CGROUP_CONFIG_ERROR("invalid %s name '%s': %m", - prop[0] ? "file" : "directory", - path); - } - return false; - } - - if (access(path, perm)) - { - /* No such file or directory / Permission denied */ - - if (report && !permlist->optional) - { - CGROUP_CONFIG_ERROR("can't access %s '%s': %m", - prop[0] ? "file" : "directory", - path); - } - return false; - } - } - - if (group == RESGROUP_ROOT_ID && permlist->presult) - *permlist->presult = true; - - return true; -} - -/* - * Check permissions on group's cgroup dir & interface files. - * - * - if report is true then raise an error if any mandatory permission - * is not met; - * - otherwise only return false; - */ -static bool -checkPermission(Oid group, bool report) -{ - int i; - - foreach_perm_list(i, permlists) - { - const PermList *permlist = &permlists[i]; - - if (!permListCheck(permlist, group, report) && !permlist->optional) - return false; - } - - return true; -} - -/* - * Same as checkPermission, just check cpuset dir & interface files - * - */ -static bool -checkCpuSetPermission(Oid group, bool report) -{ - if (!gp_resource_group_enable_cgroup_cpuset) - return true; - - if (!permListCheck(&cpusetPermList, group, report) && - !cpusetPermList.optional) - return false; - - return true; -} - -/* - * Check the mount hierarchy of cpu and cpuset subsystem. - * - * Raise an error if cpu and cpuset are mounted on the same hierarchy. - */ -static void -checkCompHierarchy() -{ - ResGroupCompType comp; - FILE *f; - char buf[MAXPATHLEN * 2]; - - f = fopen("/proc/1/cgroup", "r"); - if (!f) - { - CGROUP_CONFIG_ERROR("can't check component mount hierarchy \ - file '/proc/1/cgroup' doesn't exist"); - return; - } - - /* - * format: id:comps:path, e.g.: - * - * 10:cpuset:/ - * 4:cpu,cpuacct:/ - * 1:name=systemd:/init.scope - * 0::/init.scope - */ - while (fscanf(f, "%*d:%s", buf) != EOF) - { - char *ptr; - char *tmp; - char sep = '\0'; - /* mark if the line has alread contained cpu or cpuset comp */ - int markComp = RESGROUP_COMP_TYPE_UNKNOWN; - - /* buf is stored with "comps:path" */ - if (buf[0] == ':') - continue; /* ignore empty comp */ - - /* split comps */ - for (ptr = buf; sep != ':'; ptr = tmp) - { - tmp = strpbrk(ptr, ":,="); - - sep = *tmp; - *tmp++ = 0; - - /* for name=comp case there is nothing to do with the name */ - if (sep == '=') - continue; - - comp = compByName(ptr); - - if (comp == RESGROUP_COMP_TYPE_UNKNOWN) - continue; /* not used by us */ - - if (comp == RESGROUP_COMP_TYPE_CPU || comp == RESGROUP_COMP_TYPE_CPUSET) - { - if (markComp == RESGROUP_COMP_TYPE_UNKNOWN) - markComp = comp; - else - { - Assert(markComp != comp); - fclose(f); - CGROUP_CONFIG_ERROR("can't mount 'cpu' and 'cpuset' on the same hierarchy"); - return; - } - } - } - } - - fclose(f); -} - -/* get total ram and total swap (in Byte) from sysinfo */ -static void -getMemoryInfo(unsigned long *ram, unsigned long *swap) -{ - struct sysinfo info; - if (sysinfo(&info) < 0) - elog(ERROR, "can't get memory information: %m"); - *ram = info.totalram; - *swap = info.totalswap; -} - -/* get cgroup ram and swap (in Byte) */ -static void -getCgMemoryInfo(uint64 *cgram, uint64 *cgmemsw) -{ - ResGroupCompType comp = RESGROUP_COMP_TYPE_MEMORY; - - *cgram = readInt64(RESGROUP_ROOT_ID, BASETYPE_PARENT, - comp, "memory.limit_in_bytes"); - - if (gp_resource_group_enable_cgroup_swap) - { - *cgmemsw = readInt64(RESGROUP_ROOT_ID, BASETYPE_PARENT, - comp, "memory.memsw.limit_in_bytes"); - } - else - { - elog(DEBUG1, "swap memory is unlimited"); - *cgmemsw = (uint64) -1LL; - } -} - -/* get vm.overcommit_ratio */ -static int -getOvercommitRatio(void) -{ - int ratio; - char data[MAX_INT_STRING_LEN]; - size_t datasize = sizeof(data); - const char *path = "/proc/sys/vm/overcommit_ratio"; - - readData(path, data, datasize); - - if (sscanf(data, "%d", &ratio) != 1) - elog(ERROR, "invalid number '%s' in '%s'", data, path); - - return ratio; -} - -/* detect cgroup mount point */ -static bool -detectCgroupMountPoint(void) -{ - struct mntent *me; - FILE *fp; - - if (cgdir[0]) - return true; - - fp = setmntent(PROC_MOUNTS, "r"); - if (fp == NULL) - CGROUP_CONFIG_ERROR("can not open '%s' for read", PROC_MOUNTS); - - - while ((me = getmntent(fp))) - { - char * p; - - if (strcmp(me->mnt_type, "cgroup")) - continue; - - strncpy(cgdir, me->mnt_dir, sizeof(cgdir) - 1); - - p = strrchr(cgdir, '/'); - if (p == NULL) - CGROUP_CONFIG_ERROR("cgroup mount point parse error: %s", cgdir); - else - *p = 0; - break; - } - - endmntent(fp); - - return !!cgdir[0]; -} - -/* - * Init gpdb cpu settings. - * - * Must be called after Probe() and Bless(). - */ -static void -initCpu(void) -{ - ResGroupCompType comp = RESGROUP_COMP_TYPE_CPU; - int64 cfs_quota_us; - int64 shares; - - /* - * CGroup promises that cfs_quota_us will never be 0, however on centos6 - * we ever noticed that it has the value 0. - */ - if (parent_cfs_quota_us <= 0LL) - { - /* - * parent cgroup is unlimited, calculate gpdb's limitation based on - * system hardware configuration. - * - * cfs_quota_us := parent.cfs_period_us * ncores * gp_resource_group_cpu_limit - */ - cfs_quota_us = system_cfs_quota_us * gp_resource_group_cpu_limit; - } - else - { - /* - * parent cgroup is also limited, then calculate gpdb's limitation - * based on it. - * - * cfs_quota_us := parent.cfs_quota_us * gp_resource_group_cpu_limit - */ - cfs_quota_us = parent_cfs_quota_us * gp_resource_group_cpu_limit; - } - - writeInt64(RESGROUP_ROOT_ID, BASETYPE_GPDB, - comp, "cpu.cfs_quota_us", cfs_quota_us); - - /* - * shares := parent.shares * gp_resource_group_cpu_priority - * - * We used to set a large shares (like 1024 * 256, the maximum possible - * value), it has very bad effect on overall system performance, - * especially on 1-core or 2-core low-end systems. - * Processes in a cold cgroup get launched and scheduled with large - * latency (a simple `cat a.txt` may executes for more than 100s). - * Here a cold cgroup is a cgroup that doesn't have active running - * processes, this includes not only the toplevel system cgroup, - * but also the inactive gpdb resgroups. - */ - shares = readInt64(RESGROUP_ROOT_ID, BASETYPE_PARENT, comp, "cpu.shares"); - shares = shares * gp_resource_group_cpu_priority; - - writeInt64(RESGROUP_ROOT_ID, BASETYPE_GPDB, - comp, "cpu.shares", shares); -} - -/* - * Init gpdb cpuset settings. - * - * Must be called after Probe() and Bless(). - */ -static void -initCpuSet(void) -{ - ResGroupCompType comp = RESGROUP_COMP_TYPE_CPUSET; - char buffer[MaxCpuSetLength]; - - if (!gp_resource_group_enable_cgroup_cpuset) - return; - - /* - * Get cpuset.mems and cpuset.cpus values from cgroup cpuset root path, - * and set them to cpuset/gpdb/cpuset.mems and cpuset/gpdb/cpuset.cpus - * to make sure that gpdb directory configuration is same as its - * parent directory - */ - - readStr(RESGROUP_ROOT_ID, BASETYPE_PARENT, comp, "cpuset.mems", - buffer, sizeof(buffer)); - writeStr(RESGROUP_ROOT_ID, BASETYPE_GPDB, comp, "cpuset.mems", buffer); - - readStr(RESGROUP_ROOT_ID, BASETYPE_PARENT, comp, "cpuset.cpus", - buffer, sizeof(buffer)); - writeStr(RESGROUP_ROOT_ID, BASETYPE_GPDB, comp, "cpuset.cpus", buffer); - - createDefaultCpuSetGroup(); -} - -static int64 -getCfsPeriodUs(ResGroupCompType comp) -{ - int64 cfs_period_us; - - /* - * calculate cpu rate limit of system. - * - * Ideally the cpu quota is calculated from parent information: - * - * system_cfs_quota_us := parent.cfs_period_us * ncores. - * - * However on centos6 we found parent.cfs_period_us can be 0 and is not - * writable. In the other side, gpdb.cfs_period_us should be equal to - * parent.cfs_period_us because sub dirs inherit parent properties by - * default, so we read it instead. - */ - cfs_period_us = readInt64(RESGROUP_ROOT_ID, BASETYPE_GPDB, - comp, "cpu.cfs_period_us"); - if (cfs_period_us == 0LL) - { - /* - * if gpdb.cfs_period_us is also 0 try to correct it by setting the - * default value 100000 (100ms). - */ - writeInt64(RESGROUP_ROOT_ID, BASETYPE_GPDB, - comp, "cpu.cfs_period_us", 100000LL); - - /* read again to verify the effect */ - cfs_period_us = readInt64(RESGROUP_ROOT_ID, BASETYPE_GPDB, - comp, "cpu.cfs_period_us"); - if (cfs_period_us <= 0LL) - CGROUP_CONFIG_ERROR("invalid cpu.cfs_period_us value: " - INT64_FORMAT, - cfs_period_us); - } - return cfs_period_us; -} - -/* Return the name for the OS group implementation */ -const char * -ResGroupOps_Name(void) -{ - return "cgroup"; -} - -/* - * Probe the configuration for the OS group implementation. - * - * Return true if everything is OK, or false is some requirements are not - * satisfied. Will not fail in either case. - */ -bool -ResGroupOps_Probe(void) -{ - /* - * We only have to do these checks and initialization once on each host, - * so only let postmaster do the job. - */ - if (IsUnderPostmaster) - return true; - - /* - * Ignore the error even if cgroup mount point can not be successfully - * probed, the error will be reported in Bless() later. - */ - if (!detectCgroupMountPoint()) - return false; - - detectCompDirs(); - - /* - * Probe for optional features like the 'cgroup' memory auditor, - * do not raise any errors. - */ - if (!checkPermission(RESGROUP_ROOT_ID, false)) - return false; - - return true; -} - -/* Check whether the OS group implementation is available and useable */ -void -ResGroupOps_Bless(void) -{ - ResGroupCompType comp = RESGROUP_COMP_TYPE_CPU; - int64 cfs_period_us; - - /* - * We only have to do these checks and initialization once on each host, - * so only let postmaster do the job. - */ - if (IsUnderPostmaster) - return; - - /* - * We should have already detected for cgroup mount point in Probe(), - * it was not an error if the detection failed at that step. But once - * we call Bless() we know we want to make use of cgroup then we must - * know the mount point, otherwise it's a critical error. - */ - if (!cgdir[0]) - CGROUP_CONFIG_ERROR("can not find cgroup mount point"); - - /* - * Check again, this time we will fail on unmet requirements. - */ - checkPermission(RESGROUP_ROOT_ID, true); - - /* - * Check if cpu and cpuset subsystems are mounted on the same hierarchy. - * We do not allow they mount on the same hierarchy, because writting pid - * to DEFAULT_CPUSET_GROUP_ID in ResGroupOps_AssignGroup will cause the - * removal of the pid in group BASETYPE_GPDB, which will make cpu usage - * out of control. - */ - if (!CGROUP_CPUSET_IS_OPTIONAL) - checkCompHierarchy(); - - /* - * Dump the cgroup comp dirs to logs. - * Check detectCompDirs() to know why this is not done in that function. - */ - dumpCompDirs(); - - /* - * Get some necessary system information. - * We can not do them in Probe() as failure is not allowed in that one. - */ - - /* get system cpu cores */ - ncores = getCpuCores(); - - cfs_period_us = getCfsPeriodUs(comp); - system_cfs_quota_us = cfs_period_us * ncores; - - /* read cpu rate limit of parent cgroup */ - parent_cfs_quota_us = readInt64(RESGROUP_ROOT_ID, BASETYPE_PARENT, - comp, "cpu.cfs_quota_us"); -} - -/* Initialize the OS group */ -void -ResGroupOps_Init(void) -{ - initCpu(); - initCpuSet(); - - /* - * Create the auxiliary process cgroup, and put postmaster and all the - * children processes into the group. - */ - ResGroupOps_CreateGroup(RESGROUP_AUXILIARY_PROCESS_GROUP_ID); - ResGroupOps_AssignGroup(RESGROUP_AUXILIARY_PROCESS_GROUP_ID, NULL, PostmasterPid); -} - -/* Adjust GUCs for this OS group implementation */ -void -ResGroupOps_AdjustGUCs(void) -{ - /* - * cgroup cpu limitation works best when all processes have equal - * priorities, so we force all the segments and postmaster to - * work with nice=0. - * - * this function should be called before GUCs are dispatched to segments. - */ - gp_segworker_relative_priority = 0; -} - -/* - * Create the OS group for group. - */ -void -ResGroupOps_CreateGroup(Oid group) -{ - int retry = 0; - - if (!createDir(group, RESGROUP_COMP_TYPE_CPU) || - !createDir(group, RESGROUP_COMP_TYPE_CPUACCT) || - (gp_resource_group_enable_cgroup_cpuset && - !createDir(group, RESGROUP_COMP_TYPE_CPUSET)) || - (gp_resource_group_enable_cgroup_memory && - !createDir(group, RESGROUP_COMP_TYPE_MEMORY))) - { - CGROUP_ERROR("can't create cgroup for resgroup '%d': %m", group); - } - - /* - * although the group dir is created the interface files may not be - * created yet, so we check them repeatedly until everything is ready. - */ - while (++retry <= MAX_RETRY && !checkPermission(group, false)) - pg_usleep(1000); - - if (retry > MAX_RETRY) - { - /* - * still not ready after MAX_RETRY retries, might be a real error, - * raise the error. - */ - checkPermission(group, true); - } - - if (gp_resource_group_enable_cgroup_cpuset) - { - /* - * Initialize cpuset.mems and cpuset.cpus values as its parent directory - */ - ResGroupCompType comp = RESGROUP_COMP_TYPE_CPUSET; - char buffer[MaxCpuSetLength]; - - readStr(RESGROUP_ROOT_ID, BASETYPE_GPDB, comp, "cpuset.mems", - buffer, sizeof(buffer)); - writeStr(group, BASETYPE_GPDB, comp, "cpuset.mems", buffer); - - readStr(RESGROUP_ROOT_ID, BASETYPE_GPDB, comp, "cpuset.cpus", - buffer, sizeof(buffer)); - writeStr(group, BASETYPE_GPDB, comp, "cpuset.cpus", buffer); - } -} - -/* - * Create the OS group for default cpuset group. - * default cpuset group is a special group, only take effect in cpuset - */ -static void -createDefaultCpuSetGroup(void) -{ - ResGroupCompType comp = RESGROUP_COMP_TYPE_CPUSET; - int retry = 0; - - if (!createDir(DEFAULT_CPUSET_GROUP_ID, comp)) - { - CGROUP_ERROR("can't create cpuset cgroup for resgroup '%d': %m", - DEFAULT_CPUSET_GROUP_ID); - } - - /* - * although the group dir is created the interface files may not be - * created yet, so we check them repeatedly until everything is ready. - */ - while (++retry <= MAX_RETRY && - !checkCpuSetPermission(DEFAULT_CPUSET_GROUP_ID, false)) - pg_usleep(1000); - - if (retry > MAX_RETRY) - { - /* - * still not ready after MAX_RETRY retries, might be a real error, - * raise the error. - */ - checkCpuSetPermission(DEFAULT_CPUSET_GROUP_ID, true); - } - - /* - * Initialize cpuset.mems and cpuset.cpus in default group as its - * parent directory - */ - char buffer[MaxCpuSetLength]; - - readStr(RESGROUP_ROOT_ID, BASETYPE_GPDB, comp, "cpuset.mems", - buffer, sizeof(buffer)); - writeStr(DEFAULT_CPUSET_GROUP_ID, BASETYPE_GPDB, comp, "cpuset.mems", buffer); - - readStr(RESGROUP_ROOT_ID, BASETYPE_GPDB, comp, "cpuset.cpus", - buffer, sizeof(buffer)); - writeStr(DEFAULT_CPUSET_GROUP_ID, BASETYPE_GPDB, comp, "cpuset.cpus", buffer); -} - -/* - * Destroy the OS group for group. - * - * One OS group can not be dropped if there are processes running under it, - * if migrate is true these processes will be moved out automatically. - */ -void -ResGroupOps_DestroyGroup(Oid group, bool migrate) -{ - if (!removeDir(group, RESGROUP_COMP_TYPE_CPU, "cpu.shares", migrate) || - !removeDir(group, RESGROUP_COMP_TYPE_CPUACCT, NULL, migrate) || - (gp_resource_group_enable_cgroup_cpuset && - !removeDir(group, RESGROUP_COMP_TYPE_CPUSET, NULL, migrate)) || - (gp_resource_group_enable_cgroup_memory && - !removeDir(group, RESGROUP_COMP_TYPE_MEMORY, "memory.limit_in_bytes", migrate))) - { - CGROUP_ERROR("can't remove cgroup for resgroup '%d': %m", group); - } -} - -/* - * Assign a process to the OS group. A process can only be assigned to one - * OS group, if it's already running under other OS group then it'll be moved - * out that OS group. - * - * pid is the process id. - */ -void -ResGroupOps_AssignGroup(Oid group, ResGroupCaps *caps, int pid) -{ - bool oldViaCpuset = oldCaps.cpuRateLimit == CPU_RATE_LIMIT_DISABLED; - bool curViaCpuset = caps ? caps->cpuRateLimit == CPU_RATE_LIMIT_DISABLED : false; - - /* needn't write to file if the pid has already been written in. - * Unless it has not been writtien or the group has changed or - * cpu control mechanism has changed */ - if (IsUnderPostmaster && - group == currentGroupIdInCGroup && - caps != NULL && - oldViaCpuset == curViaCpuset) - return; - - writeInt64(group, BASETYPE_GPDB, RESGROUP_COMP_TYPE_CPU, - "cgroup.procs", pid); - writeInt64(group, BASETYPE_GPDB, RESGROUP_COMP_TYPE_CPUACCT, - "cgroup.procs", pid); - - if (gp_resource_group_enable_cgroup_cpuset) - { - if (caps == NULL || !curViaCpuset) - { - /* add pid to default group */ - writeInt64(DEFAULT_CPUSET_GROUP_ID, BASETYPE_GPDB, - RESGROUP_COMP_TYPE_CPUSET, "cgroup.procs", pid); - } - else - { - writeInt64(group, BASETYPE_GPDB, - RESGROUP_COMP_TYPE_CPUSET, "cgroup.procs", pid); - } - } - - /* - * Do not assign the process to cgroup/memory for now. - */ - - currentGroupIdInCGroup = group; - if (caps != NULL) - { - oldCaps.cpuRateLimit = caps->cpuRateLimit; - strlcpy(oldCaps.cpuset, caps->cpuset, sizeof(oldCaps.cpuset)); - } -} - -/* - * Lock the OS group. While the group is locked it won't be removed by other - * processes. - * - * This function would block if block is true, otherwise it return with -1 - * immediately. - * - * On success it return a fd to the OS group, pass it to - * ResGroupOps_UnLockGroup() to unlock it. - */ -int -ResGroupOps_LockGroup(Oid group, ResGroupCompType comp, bool block) -{ - char path[MAXPATHLEN]; - size_t pathsize = sizeof(path); - - buildPath(group, BASETYPE_GPDB, comp, "", path, pathsize); - - return lockDir(path, block); -} - -/* - * Unblock a OS group. - * - * fd is the value returned by ResGroupOps_LockGroup(). - */ -void -ResGroupOps_UnLockGroup(Oid group, int fd) -{ - if (fd >= 0) - close(fd); -} - -/* - * Set the cpu rate limit for the OS group. - * - * cpu_rate_limit should be within [0, 100]. - */ -void -ResGroupOps_SetCpuRateLimit(Oid group, int cpu_rate_limit) -{ - ResGroupCompType comp = RESGROUP_COMP_TYPE_CPU; - - /* group.shares := gpdb.shares * cpu_rate_limit */ - - int64 shares = readInt64(RESGROUP_ROOT_ID, BASETYPE_GPDB, comp, - "cpu.shares"); - writeInt64(group, BASETYPE_GPDB, comp, - "cpu.shares", shares * cpu_rate_limit / 100); - - /* set cpu.cfs_quota_us if hard CPU enforment is enabled */ - if (gp_resource_group_cpu_ceiling_enforcement) - { - int64 periods = getCfsPeriodUs(comp); - writeInt64(group, BASETYPE_GPDB, comp, "cpu.cfs_quota_us", - periods * ResGroupOps_GetCpuCores() * cpu_rate_limit / 100); - } - else - { - writeInt64(group, BASETYPE_GPDB, comp, "cpu.cfs_quota_us", -1); - } -} - -/* - * Set the memory limit for the OS group by rate. - * - * memory_limit should be within [0, 100]. - */ -void -ResGroupOps_SetMemoryLimit(Oid group, int memory_limit) -{ - ResGroupCompType comp = RESGROUP_COMP_TYPE_MEMORY; - int fd; - int32 memory_limit_in_chunks; - - memory_limit_in_chunks = ResGroupGetVmemLimitChunks() * memory_limit / 100; - memory_limit_in_chunks *= ResGroupGetHostPrimaryCount(); - - fd = ResGroupOps_LockGroup(group, comp, true); - ResGroupOps_SetMemoryLimitByValue(group, memory_limit_in_chunks); - ResGroupOps_UnLockGroup(group, fd); -} - -/* - * Set the memory limit for the OS group by value. - * - * memory_limit is the limit value in chunks - * - * If cgroup supports memory swap, we will write the same limit to - * memory.memsw.limit and memory.limit. - */ -void -ResGroupOps_SetMemoryLimitByValue(Oid group, int32 memory_limit) -{ - ResGroupCompType comp = RESGROUP_COMP_TYPE_MEMORY; - int64 memory_limit_in_bytes; - - if (!gp_resource_group_enable_cgroup_memory) - return; - - memory_limit_in_bytes = VmemTracker_ConvertVmemChunksToBytes(memory_limit); - - /* Is swap interfaces enabled? */ - if (!gp_resource_group_enable_cgroup_swap) - { - /* No, then we only need to setup the memory limit */ - writeInt64(group, BASETYPE_GPDB, comp, "memory.limit_in_bytes", - memory_limit_in_bytes); - } - else - { - /* Yes, then we have to setup both the memory and mem+swap limits */ - - int64 memory_limit_in_bytes_old; - - /* - * Memory limit should always <= mem+swap limit, then the limits - * must be set in a proper order depending on the relation between - * new and old limits. - */ - memory_limit_in_bytes_old = readInt64(group, BASETYPE_GPDB, comp, - "memory.limit_in_bytes"); - - if (memory_limit_in_bytes > memory_limit_in_bytes_old) - { - /* When new value > old memory limit, write mem+swap limit first */ - writeInt64(group, BASETYPE_GPDB, comp, - "memory.memsw.limit_in_bytes", memory_limit_in_bytes); - writeInt64(group, BASETYPE_GPDB, comp, - "memory.limit_in_bytes", memory_limit_in_bytes); - } - else if (memory_limit_in_bytes < memory_limit_in_bytes_old) - { - /* When new value < old memory limit, write memory limit first */ - writeInt64(group, BASETYPE_GPDB, comp, - "memory.limit_in_bytes", memory_limit_in_bytes); - writeInt64(group, BASETYPE_GPDB, comp, - "memory.memsw.limit_in_bytes", memory_limit_in_bytes); - } - } -} - -/* - * Get the cpu usage of the OS group, that is the total cpu time obtained - * by this OS group, in nano seconds. - */ -int64 -ResGroupOps_GetCpuUsage(Oid group) -{ - ResGroupCompType comp = RESGROUP_COMP_TYPE_CPUACCT; - - return readInt64(group, BASETYPE_GPDB, comp, "cpuacct.usage"); -} - -/* - * Get the memory usage of the OS group - * - * memory usage is returned in chunks - */ -int32 -ResGroupOps_GetMemoryUsage(Oid group) -{ - ResGroupCompType comp = RESGROUP_COMP_TYPE_MEMORY; - int64 memory_usage_in_bytes; - char *prop; - - /* Report 0 if cgroup memory is not enabled */ - if (!gp_resource_group_enable_cgroup_memory) - return 0; - - prop = gp_resource_group_enable_cgroup_swap - ? "memory.memsw.usage_in_bytes" - : "memory.usage_in_bytes"; - - memory_usage_in_bytes = readInt64(group, BASETYPE_GPDB, comp, prop); - - return VmemTracker_ConvertVmemBytesToChunks(memory_usage_in_bytes); -} - -/* - * Get the memory limit of the OS group - * - * memory limit is returned in chunks - */ -int32 -ResGroupOps_GetMemoryLimit(Oid group) -{ - ResGroupCompType comp = RESGROUP_COMP_TYPE_MEMORY; - int64 memory_limit_in_bytes; - - /* Report unlimited (max int32) if cgroup memory is not enabled */ - if (!gp_resource_group_enable_cgroup_memory) - return (int32) ((1U << 31) - 1); - - memory_limit_in_bytes = readInt64(group, BASETYPE_GPDB, - comp, "memory.limit_in_bytes"); - - return VmemTracker_ConvertVmemBytesToChunks(memory_limit_in_bytes); -} - -/* - * Get the count of cpu cores on the system. - */ -int -ResGroupOps_GetCpuCores(void) -{ - return ncores; -} - -/* - * Get the total memory on the system in MB. - * Read from sysinfo and cgroup to get correct ram and swap. - * (total RAM * overcommit_ratio + total Swap) - */ -int -ResGroupOps_GetTotalMemory(void) -{ - unsigned long ram, swap, total; - int overcommitRatio; - uint64 cgram, cgmemsw; - uint64 memsw; - uint64 outTotal; - - overcommitRatio = getOvercommitRatio(); - getMemoryInfo(&ram, &swap); - /* Get sysinfo total ram and swap size. */ - memsw = ram + swap; - outTotal = swap + ram * overcommitRatio / 100; - getCgMemoryInfo(&cgram, &cgmemsw); - ram = Min(ram, cgram); - /* - * In the case that total ram and swap read from sysinfo is larger than - * from cgroup, ram and swap must both be limited, otherwise swap must - * not be limited(we can safely use the value from sysinfo as swap size). - */ - if (cgmemsw < memsw) - swap = cgmemsw - ram; - /* - * If it is in container, the total memory is limited by both the total - * memoery outside and the memsw of the container. - */ - total = Min(outTotal, swap + ram); - return total >> BITS_IN_MB; -} - -/* - * Set the cpuset for the OS group. - * @param group: the destination group - * @param cpuset: the value to be set - * The syntax of CPUSET is a combination of the tuples, each tuple represents - * one core number or the core numbers interval, separated by comma. - * E.g. 0,1,2-3. - */ -void -ResGroupOps_SetCpuSet(Oid group, const char *cpuset) -{ - ResGroupCompType comp = RESGROUP_COMP_TYPE_CPUSET; - - if (!gp_resource_group_enable_cgroup_cpuset) - return ; - - writeStr(group, BASETYPE_GPDB, comp, "cpuset.cpus", cpuset); -} - -/* - * Get the cpuset of the OS group. - * @param group: the destination group - * @param cpuset: the str to be set - * @param len: the upper limit of the str - */ -void -ResGroupOps_GetCpuSet(Oid group, char *cpuset, int len) -{ - ResGroupCompType comp = RESGROUP_COMP_TYPE_CPUSET; - - if (!gp_resource_group_enable_cgroup_cpuset) - return ; - - readStr(group, BASETYPE_GPDB, comp, "cpuset.cpus", cpuset, len); -} - -/* - * Convert the cpu usage to percentage within the duration. - * - * usage is the delta of GetCpuUsage() of a duration, - * duration is in micro seconds. - * - * When fully consuming one cpu core the return value will be 100.0 . - */ -float -ResGroupOps_ConvertCpuUsageToPercent(int64 usage, int64 duration) -{ - float percent; - - Assert(usage >= 0LL); - Assert(duration > 0LL); - - /* There should always be at least one core on the system */ - Assert(ncores > 0); - - /* - * Usage is the cpu time (nano seconds) obtained by this group in the time - * duration (micro seconds), so cpu time on one core can be calculated as: - * - * usage / 1000 / duration / ncores - * - * To convert it to percentage we should multiple 100%: - * - * usage / 1000 / duration / ncores * 100% - * = usage / 10 / duration / ncores - */ - percent = usage / 10.0 / duration / ncores; - - /* - * Now we have the system level percentage, however when running in a - * container with limited cpu quota we need to further scale it with - * parent. Suppose parent has 50% cpu quota and gpdb is consuming all of - * it, then we want gpdb to report the cpu usage as 100% instead of 50%. - */ - - if (parent_cfs_quota_us > 0LL) - { - /* - * Parent cgroup is also limited, scale the percentage to the one in - * parent cgroup. Do not change the expression to `percent *= ...`, - * that will lose the precision. - */ - percent = percent * system_cfs_quota_us / parent_cfs_quota_us; - } - - return percent; -} diff --git a/src/backend/utils/resgroup/resgroup.c b/src/backend/utils/resgroup/resgroup.c index e806b693d78..7b7a32afa48 100644 --- a/src/backend/utils/resgroup/resgroup.c +++ b/src/backend/utils/resgroup/resgroup.c @@ -69,11 +69,12 @@ #include "utils/builtins.h" #include "utils/memutils.h" #include "utils/ps_status.h" -#include "utils/resgroup-ops.h" +#include "utils/cgroup.h" #include "utils/resgroup.h" #include "utils/resource_manager.h" #include "utils/session_state.h" #include "utils/vmem_tracker.h" +#include "utils/cgroup-ops-v1.h" #define InvalidSlotId (-1) #define RESGROUP_MAX_SLOTS (MaxConnections) @@ -538,6 +539,28 @@ AllocResGroupEntry(Oid groupId, const ResGroupCaps *caps) LWLockRelease(ResGroupLock); } +void +initCgroup(void) +{ +#ifdef __linux__ + if (!gp_resource_group_enable_cgroup_version_two) + { + cgroupOpsRoutine = get_group_routine_alpha(); + cgroupSystemInfo = get_cgroup_sysinfo_alpha(); + } +#else + elog(ERROR, "The resource group is not support on your operating system."); +#endif + + bool probe_result = cgroupOpsRoutine->probecgroup(); + if (!probe_result) + elog(ERROR, "The control group is not well configured, please check your" + "system configuration."); + + cgroupOpsRoutine->checkcgroup(); + cgroupOpsRoutine->initcgroup(); +} + /* * Load the resource groups in shared memory. Note this * can only be done after enough setup has been done. This uses @@ -600,7 +623,7 @@ InitResGroups(void) if (gp_resource_group_enable_cgroup_cpuset) { /* Get cpuset from cpuset/gpdb, and transform it into bitset */ - ResGroupOps_GetCpuSet(RESGROUP_ROOT_ID, cpuset, MaxCpuSetLength); + cgroupOpsRoutine->getcpuset(CGROUP_ROOT_ID, cpuset, MaxCpuSetLength); bmsUnused = CpusetToBitset(cpuset, MaxCpuSetLength); /* get the minimum core number, in case of the zero core is not exist */ defaultCore = bms_next_member(bmsUnused, -1); @@ -621,12 +644,12 @@ InitResGroups(void) group = createGroup(groupId, &caps); Assert(group != NULL); - ResGroupOps_CreateGroup(groupId); - ResGroupOps_SetMemoryLimit(groupId, caps.memLimit); + cgroupOpsRoutine->createcgroup(groupId); + cgroupOpsRoutine->setmemorylimit(groupId, caps.memLimit); if (caps.cpuRateLimit != CPU_RATE_LIMIT_DISABLED) { - ResGroupOps_SetCpuRateLimit(groupId, caps.cpuRateLimit); + cgroupOpsRoutine->setcpulimit(groupId, caps.cpuRateLimit); } else { @@ -656,7 +679,7 @@ InitResGroups(void) * write cpus to corresponding file * if all the cores are available */ - ResGroupOps_SetCpuSet(groupId, caps.cpuset); + cgroupOpsRoutine->setcpuset(groupId, caps.cpuset); bmsUnused = bms_del_members(bmsUnused, bmsCurrent); } else @@ -669,7 +692,7 @@ InitResGroups(void) * can startup, then DBA can fix it */ snprintf(cpuset, MaxCpuSetLength, "%d", defaultCore); - ResGroupOps_SetCpuSet(groupId, cpuset); + cgroupOpsRoutine->setcpuset(groupId, cpuset); BitsetToCpuset(bmsMissing, cpusetMissing, MaxCpuSetLength); ereport(WARNING, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -707,7 +730,7 @@ InitResGroups(void) Assert(cpuset[0]); Assert(!CpusetIsEmpty(cpuset)); - ResGroupOps_SetCpuSet(DEFAULT_CPUSET_GROUP_ID, cpuset); + cgroupOpsRoutine->setcpuset(DEFAULT_CPUSET_GROUP_ID, cpuset); } pResGroupControl->loaded = true; @@ -804,14 +827,14 @@ ResGroupDropFinish(const ResourceGroupCallbackContext *callbackCtx, { /* reset default group, add cpu cores to it */ char cpuset[MaxCpuSetLength]; - ResGroupOps_GetCpuSet(DEFAULT_CPUSET_GROUP_ID, + cgroupOpsRoutine->getcpuset(DEFAULT_CPUSET_GROUP_ID, cpuset, MaxCpuSetLength); CpusetUnion(cpuset, group->caps.cpuset, MaxCpuSetLength); - ResGroupOps_SetCpuSet(DEFAULT_CPUSET_GROUP_ID, cpuset); + cgroupOpsRoutine->setcpuset(DEFAULT_CPUSET_GROUP_ID, cpuset); } } - ResGroupOps_DestroyGroup(callbackCtx->groupid, migrate); + cgroupOpsRoutine->destroycgroup(callbackCtx->groupid, migrate); } } PG_CATCH(); @@ -849,20 +872,20 @@ ResGroupCreateOnAbort(const ResourceGroupCallbackContext *callbackCtx) savedInterruptHoldoffCount = InterruptHoldoffCount; removeGroup(callbackCtx->groupid); /* remove the os dependent part for this resource group */ - ResGroupOps_DestroyGroup(callbackCtx->groupid, true); + cgroupOpsRoutine->destroycgroup(callbackCtx->groupid, true); if (!CpusetIsEmpty(callbackCtx->caps.cpuset) && gp_resource_group_enable_cgroup_cpuset) { /* return cpu cores to default group */ char defaultGroupCpuset[MaxCpuSetLength]; - ResGroupOps_GetCpuSet(DEFAULT_CPUSET_GROUP_ID, + cgroupOpsRoutine->getcpuset(DEFAULT_CPUSET_GROUP_ID, defaultGroupCpuset, MaxCpuSetLength); CpusetUnion(defaultGroupCpuset, callbackCtx->caps.cpuset, MaxCpuSetLength); - ResGroupOps_SetCpuSet(DEFAULT_CPUSET_GROUP_ID, defaultGroupCpuset); + cgroupOpsRoutine->setcpuset(DEFAULT_CPUSET_GROUP_ID, defaultGroupCpuset); } } PG_CATCH(); @@ -902,13 +925,13 @@ ResGroupAlterOnCommit(const ResourceGroupCallbackContext *callbackCtx) if (callbackCtx->limittype == RESGROUP_LIMIT_TYPE_CPU) { - ResGroupOps_SetCpuRateLimit(callbackCtx->groupid, + cgroupOpsRoutine->setcpulimit(callbackCtx->groupid, callbackCtx->caps.cpuRateLimit); } else if (callbackCtx->limittype == RESGROUP_LIMIT_TYPE_CPUSET) { if (gp_resource_group_enable_cgroup_cpuset) - ResGroupOps_SetCpuSet(callbackCtx->groupid, + cgroupOpsRoutine->setcpuset(callbackCtx->groupid, callbackCtx->caps.cpuset); } else if (callbackCtx->limittype != RESGROUP_LIMIT_TYPE_MEMORY_SPILL_RATIO) @@ -929,7 +952,7 @@ ResGroupAlterOnCommit(const ResourceGroupCallbackContext *callbackCtx) { char defaultCpusetGroup[MaxCpuSetLength]; /* get current default group value */ - ResGroupOps_GetCpuSet(DEFAULT_CPUSET_GROUP_ID, + cgroupOpsRoutine->getcpuset(DEFAULT_CPUSET_GROUP_ID, defaultCpusetGroup, MaxCpuSetLength); /* Add old value to default group @@ -940,7 +963,7 @@ ResGroupAlterOnCommit(const ResourceGroupCallbackContext *callbackCtx) CpusetDifference(defaultCpusetGroup, callbackCtx->caps.cpuset, MaxCpuSetLength); - ResGroupOps_SetCpuSet(DEFAULT_CPUSET_GROUP_ID, defaultCpusetGroup); + cgroupOpsRoutine->setcpuset(DEFAULT_CPUSET_GROUP_ID, defaultCpusetGroup); } } PG_CATCH(); @@ -1834,7 +1857,7 @@ decideResGroup(ResGroupInfo *pGroupInfo) if (!group) { - groupId = superuser() ? ADMINRESGROUP_OID : DEFAULTRESGROUP_OID; + groupId = superuser() ? GPDB_ADMIN_CGROUP : GPDB_DEFAULT_CGROUP; group = groupHashFind(groupId, false); } @@ -2114,7 +2137,7 @@ decideTotalChunks(int32 *totalChunks, int32 *chunkSizeInBits) nsegments = Gp_role == GP_ROLE_EXECUTE ? host_primary_segment_count : pResGroupControl->segmentsOnMaster; Assert(nsegments > 0); - tmptotalChunks = ResGroupOps_GetTotalMemory() * gp_resource_group_memory_limit / nsegments; + tmptotalChunks = getTotalMemory() * gp_resource_group_memory_limit / nsegments; /* * If vmem is larger than 16GB (i.e., 16K MB), we make the chunks bigger @@ -2637,9 +2660,8 @@ AssignResGroupOnMaster(void) self->bypassMemoryLimit = self->memUsage + RESGROUP_BYPASS_MODE_MEMORY_LIMIT_ON_QD; /* Add into cgroup */ - ResGroupOps_AssignGroup(bypassedGroup->groupId, - &bypassedGroup->caps, - MyProcPid); + cgroupOpsRoutine->attachcgroup(bypassedGroup->groupId, MyProcPid, + bypassedGroup->caps.cpuRateLimit == CPU_RATE_LIMIT_DISABLED); groupSetMemorySpillRatio(&bypassedGroup->caps); return; @@ -2667,7 +2689,8 @@ AssignResGroupOnMaster(void) SIMPLE_FAULT_INJECTOR("resgroup_assigned_on_master"); /* Add into cgroup */ - ResGroupOps_AssignGroup(self->groupId, &(self->caps), MyProcPid); + cgroupOpsRoutine->attachcgroup(self->groupId, MyProcPid, + self->caps.cpuRateLimit == CPU_RATE_LIMIT_DISABLED); /* Set spill guc */ groupSetMemorySpillRatio(&slot->caps); @@ -2851,11 +2874,12 @@ SwitchResGroupOnSegment(const char *buf, int len) LWLockRelease(ResGroupLock); - /* finally we can say we are in a valid resgroup */ + /* finally, we can say we are in a valid resgroup */ Assert(selfIsAssigned()); /* Add into cgroup */ - ResGroupOps_AssignGroup(self->groupId, &(self->caps), MyProcPid); + cgroupOpsRoutine->attachcgroup(self->groupId, MyProcPid, + self->caps.cpuRateLimit == CPU_RATE_LIMIT_DISABLED); } /* @@ -4101,25 +4125,25 @@ groupMemOnAlterForCgroup(Oid groupId, ResGroupData *group) static void groupApplyCgroupMemInc(ResGroupData *group) { - ResGroupCompType comp = RESGROUP_COMP_TYPE_MEMORY; - int32 memory_limit; - int32 memory_inc; + CGroupComponentType component = CGROUP_COMPONENT_MEMORY; + int32 memory_limit_chunks; + int32 memory_inc_chunks; int fd; Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); Assert(group->memGap < 0); - memory_inc = mempoolReserve(group->groupId, group->memGap * -1); + memory_inc_chunks = mempoolReserve(group->groupId, group->memGap * -1); - if (memory_inc <= 0) + if (memory_inc_chunks <= 0) return; - fd = ResGroupOps_LockGroup(group->groupId, comp, true); - memory_limit = ResGroupOps_GetMemoryLimit(group->groupId); - ResGroupOps_SetMemoryLimitByValue(group->groupId, memory_limit + memory_inc); - ResGroupOps_UnLockGroup(group->groupId, fd); + fd = cgroupOpsRoutine->lockcgroup(group->groupId, component, true); + memory_limit_chunks = cgroupOpsRoutine->getmemorylimitchunks(group->groupId); + cgroupOpsRoutine->setmemorylimitbychunks(group->groupId, memory_limit_chunks + memory_inc_chunks); + cgroupOpsRoutine->unlockcgroup(fd); - group->memGap += memory_inc; + group->memGap += memory_inc_chunks; } /* @@ -4130,7 +4154,7 @@ groupApplyCgroupMemInc(ResGroupData *group) static void groupApplyCgroupMemDec(ResGroupData *group) { - ResGroupCompType comp = RESGROUP_COMP_TYPE_MEMORY; + CGroupComponentType component = CGROUP_COMPONENT_MEMORY; int32 memory_limit; int32 memory_dec; int fd; @@ -4138,14 +4162,14 @@ groupApplyCgroupMemDec(ResGroupData *group) Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); Assert(group->memGap > 0); - fd = ResGroupOps_LockGroup(group->groupId, comp, true); - memory_limit = ResGroupOps_GetMemoryLimit(group->groupId); + fd = cgroupOpsRoutine->lockcgroup(group->groupId, component, true); + memory_limit = cgroupOpsRoutine->getmemorylimitchunks(group->groupId); Assert(memory_limit > group->memGap); memory_dec = group->memGap; - ResGroupOps_SetMemoryLimitByValue(group->groupId, memory_limit - memory_dec); - ResGroupOps_UnLockGroup(group->groupId, fd); + cgroupOpsRoutine->setmemorylimitbychunks(group->groupId, memory_limit - memory_dec); + cgroupOpsRoutine->unlockcgroup(fd); mempoolRelease(group->groupId, memory_dec); notifyGroupsOnMem(group->groupId); @@ -4192,10 +4216,10 @@ groupMemOnDumpForCgroup(ResGroupData *group, StringInfo str) appendStringInfo(str, "{"); appendStringInfo(str, "\"used\":%d, ", VmemTracker_ConvertVmemChunksToMB( - ResGroupOps_GetMemoryUsage(group->groupId) / ResGroupGetHostPrimaryCount())); + cgroupOpsRoutine->getmemoryusage(group->groupId) / ResGroupGetHostPrimaryCount())); appendStringInfo(str, "\"limit_granted\":%d", VmemTracker_ConvertVmemChunksToMB( - ResGroupOps_GetMemoryLimit(group->groupId) / ResGroupGetHostPrimaryCount())); + cgroupOpsRoutine->getmemorylimitchunks(group->groupId) / ResGroupGetHostPrimaryCount())); appendStringInfo(str, "}"); } @@ -4436,7 +4460,7 @@ cpusetOperation(char *cpuset1, const char *cpuset2, else { /* Get cpuset from cpuset/gpdb, and transform it into bitset */ - ResGroupOps_GetCpuSet(RESGROUP_ROOT_ID, cpuset, MaxCpuSetLength); + cgroupOpsRoutine->getcpuset(CGROUP_ROOT_ID, cpuset, MaxCpuSetLength); Bitmapset *bmsDefault = CpusetToBitset(cpuset, MaxCpuSetLength); /* get the minimum core number, in case of the zero core is not exist */ defaultCore = bms_next_member(bmsDefault, -1); @@ -4643,7 +4667,8 @@ HandleMoveResourceGroup(void) self->caps = slot->caps; /* Add into cgroup */ - ResGroupOps_AssignGroup(self->groupId, &(self->caps), MyProcPid); + cgroupOpsRoutine->attachcgroup(self->groupId, MyProcPid, + self->caps.cpuRateLimit == CPU_RATE_LIMIT_DISABLED); } PG_CATCH(); { @@ -4709,7 +4734,8 @@ HandleMoveResourceGroup(void) Assert(selfIsAssigned()); /* Add into cgroup */ - ResGroupOps_AssignGroup(self->groupId, &(self->caps), MyProcPid); + cgroupOpsRoutine->attachcgroup(self->groupId, MyProcPid, + self->caps.cpuRateLimit == CPU_RATE_LIMIT_DISABLED); } } diff --git a/src/backend/utils/resgroup/resgroup_helper.c b/src/backend/utils/resgroup/resgroup_helper.c index be83506ec33..e36cfc71bf3 100644 --- a/src/backend/utils/resgroup/resgroup_helper.c +++ b/src/backend/utils/resgroup/resgroup_helper.c @@ -24,7 +24,7 @@ #include "utils/builtins.h" #include "utils/datetime.h" #include "utils/resgroup.h" -#include "utils/resgroup-ops.h" +#include "utils/cgroup.h" #include "utils/resource_manager.h" typedef struct ResGroupStat @@ -65,7 +65,7 @@ calcCpuUsage(StringInfoData *str, appendStringInfo(str, "\"%d\":%.2f", GpIdentity.segindex, - ResGroupOps_ConvertCpuUsageToPercent(usage, duration)); + cgroupOpsRoutine->convertcpuusage(usage, duration)); } /* @@ -93,7 +93,7 @@ getResUsage(ResGroupStatCtx *ctx, Oid inGroupId) ResGroupStat *row = &ctx->groups[j]; Oid groupId = DatumGetObjectId(row->groupId); - usages[j] = ResGroupOps_GetCpuUsage(groupId); + usages[j] = cgroupOpsRoutine->getcpuusage(groupId); timestamps[j] = GetCurrentTimestamp(); } @@ -147,7 +147,7 @@ getResUsage(ResGroupStatCtx *ctx, Oid inGroupId) appendStringInfo(row->cpuUsage, "{"); calcCpuUsage(row->cpuUsage, usages[j], timestamps[j], - ResGroupOps_GetCpuUsage(groupId), + cgroupOpsRoutine->getcpuusage(groupId), GetCurrentTimestamp()); } @@ -181,7 +181,7 @@ getResUsage(ResGroupStatCtx *ctx, Oid inGroupId) GpIdentity.segindex, DatumGetCString(d)); calcCpuUsage(row->cpuUsage, usages[j], timestamps[j], - ResGroupOps_GetCpuUsage(groupId), + cgroupOpsRoutine->getcpuusage(groupId), GetCurrentTimestamp()); } } diff --git a/src/backend/utils/resource_manager/resource_manager.c b/src/backend/utils/resource_manager/resource_manager.c index 7853659dbd4..5097375bfb6 100644 --- a/src/backend/utils/resource_manager/resource_manager.c +++ b/src/backend/utils/resource_manager/resource_manager.c @@ -23,7 +23,7 @@ #include "utils/faultinjector.h" #include "utils/guc.h" #include "utils/resource_manager.h" -#include "utils/resgroup-ops.h" +#include "utils/cgroup.h" #include "utils/session_state.h" /* @@ -81,7 +81,8 @@ InitResManager(void) gp_resmanager_print_operator_memory_limits = &gp_resgroup_print_operator_memory_limits; InitResGroups(); - ResGroupOps_AdjustGUCs(); + + cgroupOpsRoutine->adjustgucs(); ResGroupActivated = true; } diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index 86781bf424b..88b605fd074 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -712,6 +712,9 @@ extern bool coredump_on_memerror; /* Greenplum resource group query_mem re-calculate on QE */ extern bool gp_resource_group_enable_recalculate_query_mem; +/* Greenplum linux cgroup version, is enable version 2 */ +extern bool gp_resource_group_enable_cgroup_version_two; + /* * Autostats feature, whether or not to to automatically run ANALYZE after * insert/delete/update/ctas or after ctas/copy/insert in case the target diff --git a/src/include/utils/cgroup-ops-v1.h b/src/include/utils/cgroup-ops-v1.h new file mode 100644 index 00000000000..e61bcd33657 --- /dev/null +++ b/src/include/utils/cgroup-ops-v1.h @@ -0,0 +1,22 @@ +/*------------------------------------------------------------------------- + * + * cgroup-ops-v1.h + * GPDB resource group definitions. + * + * Copyright (c) 2017 VMware, Inc. or its affiliates. + * + * + * IDENTIFICATION + * src/include/utils/cgroup-ops-v1.h + * + *------------------------------------------------------------------------- + */ +#ifndef RES_GROUP_OPS_V1_H +#define RES_GROUP_OPS_V1_H + +#include "utils/cgroup.h" + +extern CGroupOpsRoutine *get_group_routine_alpha(void); +extern CGroupSystemInfo *get_cgroup_sysinfo_alpha(void); + +#endif /* RES_GROUP_OPS_V1_H */ diff --git a/src/include/utils/cgroup.h b/src/include/utils/cgroup.h new file mode 100644 index 00000000000..6838069f75a --- /dev/null +++ b/src/include/utils/cgroup.h @@ -0,0 +1,235 @@ +/*------------------------------------------------------------------------- + * + * cgroup.h + * Linux control group interface definitions. + * + * Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates. + * + * + * IDENTIFICATION + * src/include/utils/cgroup.h + * + *------------------------------------------------------------------------- + */ + +#ifndef CGROUP_H +#define CGROUP_H + +#include "postgres.h" + +/* + * The pre-occupied group OID, do not change this! + */ +#define GPDB_DEFAULT_CGROUP 6437 +#define GPDB_ADMIN_CGROUP 6438 +#define GPDB_SYSTEM_CGROUP 6441 + +#define MAX_CGROUP_PATHLEN 256 + +#define CGROUP_ERROR(...) elog(ERROR, __VA_ARGS__) +#define CGROUP_CONFIG_ERROR(...) \ + CGROUP_ERROR("cgroup is not properly configured: " __VA_ARGS__) + +#define FALLBACK_COMP_DIR "" +#define PROC_MOUNTS "/proc/self/mounts" +#define MAX_INT_STRING_LEN 20 +#define MAX_RETRY 10 + +/* + * Default cpuset group is a group manages the cpu cores which not belong to + * any other cpuset group. All the processes which not belong to any cpuset + * group will be run on cores in default cpuset group. It is a virtual group, + * can't be seen in gpdb. + */ +#define DEFAULT_CPUSET_GROUP_ID 1 +/* + * If cpu_rate_limit is set to this value, it means this feature is disabled + */ +#define CPU_RATE_LIMIT_DISABLED (-1) + +/* This is the default value about Linux Control Group */ +#define DEFAULT_CPU_PERIOD_US 100000LL + + +/* + * Resource Group underlying component types. + */ +typedef enum +{ + CGROUP_COMPONENT_FIRST = 0, + CGROUP_COMPONENT_UNKNOWN = -1, + CGROUP_COMPONENT_PLAIN = -2, + + /* + * let CGROUP_COMPONENT_CPU equals to CGROUP_COMPONENT_FIRST, + * it's convinent to loop all the control component from zero. + */ + CGROUP_COMPONENT_CPU = 0, + CGROUP_COMPONENT_CPUACCT, + CGROUP_COMPONENT_MEMORY, + CGROUP_COMPONENT_CPUSET, + + CGROUP_COMPONENT_COUNT, +} CGroupComponentType; + + +typedef enum +{ + BASEDIR_GPDB, /* translate to "/gpdb" */ + BASEDIR_PARENT, /* translate to "" */ +} BaseDirType; + +#define CGROUP_ROOT_ID (InvalidOid) + +typedef struct CGroupSystemInfo +{ + /* The number of CPU cores on this machine */ + int ncores; + + /* The cgroup mount dir */ + char cgroup_dir[MAX_CGROUP_PATHLEN]; + +} CGroupSystemInfo; + +/* Read at most datasize bytes from a file. */ +extern size_t readData(const char *path, char *data, size_t datasize); +/* Write datasize bytes to a file. */ +extern void writeData(const char *path, const char *data, size_t datasize); + +/* Read an int64 value from a cgroup interface file. */ +extern int64 readInt64(Oid group, BaseDirType base, CGroupComponentType component, + const char *filename); +/* Write an int64 value to a cgroup interface file. */ +extern void writeInt64(Oid group, BaseDirType base, CGroupComponentType component, + const char *filename, int64 x); + +/* Read a string value from a cgroup interface file. */ +extern void readStr(Oid group, BaseDirType base, CGroupComponentType component, + const char *filename, char *str, int len); +/* Write a string value to a cgroup interface file. */ +extern void writeStr(Oid group, BaseDirType base, CGroupComponentType component, + const char *filename, const char *strValue); + +extern void buildPath(Oid group, BaseDirType base, CGroupComponentType component, + const char *filename, char *pathBuffer, size_t pathBufferSize); +extern bool buildPathSafe(Oid group, BaseDirType base, CGroupComponentType component, + const char *filename, char *pathBuffer, size_t pathBufferSize); + +extern bool validateComponentDir(CGroupComponentType component); + +extern const char * getComponentName(CGroupComponentType component); +extern CGroupComponentType getComponentType(const char *name); +extern const char *getComponentDir(CGroupComponentType component); +extern void setComponentDir(CGroupComponentType component, const char *dir); + +extern int lockDir(const char *path, bool block); + +/* Create cgroup dir. */ +extern bool createDir(Oid group, CGroupComponentType comp); +/* Delete cgroup dir. */ +extern bool deleteDir(Oid group, CGroupComponentType component, const char *filename, bool unassign, + void (*detachcgroup) (Oid group, CGroupComponentType component, int fd_dir)); + +extern int getCPUCores(void); +extern bool getCgroupMountDir(void); +extern int getTotalMemory(void); + +/* + * Interfaces for OS dependent operations + */ + +typedef const char *(*getcgroupname_function) (void); + +/* Probe the configuration for the OS group implementation. */ +typedef bool (*probecgroup_function) (void); +/* Check whether the OS group implementation is available and usable. */ +typedef void (*checkcgroup_function) (void); + +/* Initialize the OS group. */ +typedef void (*initcgroup_function) (void); + +/* Adjust GUCs for this OS group implementation. */ +typedef void (*adjustgucs_function) (void); + +/* Create OS cgroup. */ +typedef void (*createcgroup_function) (Oid group); +/* Destroy OS cgroup. */ +typedef void (*destroycgroup_function) (Oid group, bool migrate); + +/* Attach a process to the OS cgroup. */ +typedef void (*attachcgroup_function) (Oid group, int pid, bool is_cpuset_enabled); +/* detach a process to the OS cgroup. */ +typedef void (*detachcgroup_function) (Oid group, CGroupComponentType component, int fd_dir); + +/* Lock the OS group. */ +typedef int (*lockcgroup_function) (Oid group, CGroupComponentType component, bool block); +/* Unlock the OS group. */ +typedef void (*unlockcgroup_function) (int fd); + +/* Set the cpu limit. */ +typedef void (*setcpulimit_function) (Oid group, int cpu_rate_limit); +/* Set the cpu share. */ +typedef void (*setcpushare_function) (Oid group, int cpu_share); + +/* Get the cpu usage of the OS group. */ +typedef int64 (*getcpuusage_function) (Oid group); + +typedef int32 (*getmemoryusage_function) (Oid group); +typedef int32 (*getmemorylimitchunks_function) (Oid group); +typedef void (*setmemorylimit_function) (Oid group, int memory_limit); +typedef void (*setmemorylimitchunks_function) (Oid group, int32 chunks); + +/* Get the cpuset configuration of a cgroup. */ +typedef void (*getcpuset_function) (Oid group, char *cpuset, int len); + +/* Set the cpuset configuration of a cgroup. */ +typedef void (*setcpuset_function) (Oid group, const char *cpuset); + +/* Convert the cpu usage to percentage within the duration. */ +typedef float (*convertcpuusage_function) (int64 usage, int64 duration); + + +typedef struct CGroupOpsRoutine +{ + getcgroupname_function getcgroupname; + + probecgroup_function probecgroup; + checkcgroup_function checkcgroup; + + initcgroup_function initcgroup; + + adjustgucs_function adjustgucs; + + createcgroup_function createcgroup; + destroycgroup_function destroycgroup; + + attachcgroup_function attachcgroup; + detachcgroup_function detachcgroup; + + lockcgroup_function lockcgroup; + unlockcgroup_function unlockcgroup; + + setcpulimit_function setcpulimit; + + setcpushare_function setcpushare; + + getcpuusage_function getcpuusage; + + getmemoryusage_function getmemoryusage; + setmemorylimit_function setmemorylimit; + getmemorylimitchunks_function getmemorylimitchunks; + setmemorylimitchunks_function setmemorylimitbychunks; + + getcpuset_function getcpuset; + setcpuset_function setcpuset; + + convertcpuusage_function convertcpuusage; +} CGroupOpsRoutine; + +/* The global function handler. */ +extern CGroupOpsRoutine *cgroupOpsRoutine; + +/* The global system info. */ +extern CGroupSystemInfo *cgroupSystemInfo; + +#endif /* CGROUP_H */ diff --git a/src/include/utils/resgroup-ops.h b/src/include/utils/resgroup-ops.h deleted file mode 100644 index 380c4d9dd24..00000000000 --- a/src/include/utils/resgroup-ops.h +++ /dev/null @@ -1,84 +0,0 @@ -/*------------------------------------------------------------------------- - * - * resgroup-ops.h - * GPDB resource group definitions. - * - * Copyright (c) 2017 VMware, Inc. or its affiliates. - * - * - * IDENTIFICATION - * src/include/utils/resgroup-ops.h - * - *------------------------------------------------------------------------- - */ -#ifndef RES_GROUP_OPS_H -#define RES_GROUP_OPS_H - -/* - * Resource Group underlying component types. - */ -typedef enum -{ - RESGROUP_COMP_TYPE_FIRST = 0, - RESGROUP_COMP_TYPE_UNKNOWN = -1, - - RESGROUP_COMP_TYPE_CPU, - RESGROUP_COMP_TYPE_CPUACCT, - RESGROUP_COMP_TYPE_MEMORY, - RESGROUP_COMP_TYPE_CPUSET, - - RESGROUP_COMP_TYPE_COUNT, -} ResGroupCompType; - -#define RESGROUP_ROOT_ID (InvalidOid) - -/* - * Default cpu group for postmaster process and it's auxiliary processes, such as - * BgWriter, SysLogger, WalWriter and so on. Because those auxiliary processes are - * created in different time and it's hard to add them into a same cgroup through - * their entrance, so we will create a default cpu group at the beginning of database - * start. - * - * This is a hard code programing, but we can't avoid it. - */ -#define RESGROUP_AUXILIARY_PROCESS_GROUP_ID 6441 - -/* - * Default cpuset group is a group manages the cpu cores which not belong to - * any other cpuset group. All the processes which not belong to any cpuset - * group will be run on cores in default cpuset group. It is a virtual group, - * can't be seen in gpdb. - */ -#define DEFAULT_CPUSET_GROUP_ID 1 -/* - * If cpu_rate_limit is set to this value, it means this feature is disabled - */ -#define CPU_RATE_LIMIT_DISABLED (-1) - -/* - * Interfaces for OS dependent operations - */ - -extern const char *ResGroupOps_Name(void); -extern bool ResGroupOps_Probe(void); -extern void ResGroupOps_Bless(void); -extern void ResGroupOps_Init(void); -extern void ResGroupOps_AdjustGUCs(void); -extern void ResGroupOps_CreateGroup(Oid group); -extern void ResGroupOps_DestroyGroup(Oid group, bool migrate); -extern void ResGroupOps_AssignGroup(Oid group, ResGroupCaps *caps, int pid); -extern int ResGroupOps_LockGroup(Oid group, ResGroupCompType comp, bool block); -extern void ResGroupOps_UnLockGroup(Oid group, int fd); -extern void ResGroupOps_SetCpuRateLimit(Oid group, int cpu_rate_limit); -extern void ResGroupOps_SetMemoryLimit(Oid group, int memory_limit); -extern void ResGroupOps_SetMemoryLimitByValue(Oid group, int32 memory_limit); -extern int64 ResGroupOps_GetCpuUsage(Oid group); -extern int32 ResGroupOps_GetMemoryUsage(Oid group); -extern int32 ResGroupOps_GetMemoryLimit(Oid group); -extern int ResGroupOps_GetCpuCores(void); -extern int ResGroupOps_GetTotalMemory(void); -extern void ResGroupOps_SetCpuSet(Oid group, const char *cpuset); -extern void ResGroupOps_GetCpuSet(Oid group, char *cpuset, int len); -float ResGroupOps_ConvertCpuUsageToPercent(int64 usage, int64 duration); - -#endif /* RES_GROUP_OPS_H */ diff --git a/src/include/utils/resgroup.h b/src/include/utils/resgroup.h index 3b7d558b053..ef3b3aafd5c 100644 --- a/src/include/utils/resgroup.h +++ b/src/include/utils/resgroup.h @@ -19,6 +19,7 @@ #include "cdb/memquota.h" #include "catalog/pg_resgroup.h" #include "utils/session_state.h" +#include "utils/cgroup.h" /* * The max number of resource groups. @@ -151,6 +152,7 @@ typedef struct extern Size ResGroupShmemSize(void); extern void ResGroupControlInit(void); +extern void initCgroup(void); /* Load resource group information from catalog */ extern void InitResGroups(void); diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index d0a030550a5..a751b6a4ca9 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -272,6 +272,7 @@ "gp_resource_group_cpu_ceiling_enforcement", "gp_resource_group_cpu_limit", "gp_resource_group_cpu_priority", + "gp_resource_group_enable_cgroup_version_two", "gp_resource_group_enable_recalculate_query_mem", "gp_resource_group_memory_limit", "gp_resource_group_queuing_timeout", diff --git a/src/test/isolation2/input/resgroup/resgroup_cpu_rate_limit.source b/src/test/isolation2/input/resgroup/resgroup_cpu_rate_limit.source index 28f26303f2e..6d927207991 100644 --- a/src/test/isolation2/input/resgroup/resgroup_cpu_rate_limit.source +++ b/src/test/isolation2/input/resgroup/resgroup_cpu_rate_limit.source @@ -141,7 +141,7 @@ DO LANGUAGE PLPYTHON3U $$ SELECT oid FROM pg_resgroup WHERE rsgname='{}' '''.format(name))[0]['oid']) sub_shares = get_cgroup_prop('/cpu/gpdb/{}/cpu.shares'.format(oid)) - assert sub_shares == shares * cpu_rate_limit / 100 + assert sub_shares == int(shares * cpu_rate_limit / 100) # check default groups check_group_shares('default_group') diff --git a/src/test/isolation2/output/resgroup/resgroup_cpu_rate_limit.source b/src/test/isolation2/output/resgroup/resgroup_cpu_rate_limit.source index baa5e382e8f..852ad5953a7 100644 --- a/src/test/isolation2/output/resgroup/resgroup_cpu_rate_limit.source +++ b/src/test/isolation2/output/resgroup/resgroup_cpu_rate_limit.source @@ -68,7 +68,7 @@ def show_guc(guc): return plpy.execute('SHOW {}'.format(guc))[0][guc] # get global gucs gp_resource_group_cpu_limit = float(show_guc('gp_resource_group_cpu_limit')) gp_resource_group_cpu_priority = int(show_guc('gp_resource_group_cpu_priority')) # cfs_quota_us := cfs_period_us * ncores * gp_resource_group_cpu_limit assert cfs_quota_us == cfs_period_us * ncores * gp_resource_group_cpu_limit # shares := 1024 * gp_resource_group_cpu_priority assert shares == 1024 * gp_resource_group_cpu_priority -# SUB/shares := TOP/shares * cpu_rate_limit def check_group_shares(name): cpu_rate_limit = int(plpy.execute(''' SELECT value FROM pg_resgroupcapability c, pg_resgroup g WHERE c.resgroupid=g.oid AND reslimittype=2 AND g.rsgname='{}' '''.format(name))[0]['value']) oid = int(plpy.execute(''' SELECT oid FROM pg_resgroup WHERE rsgname='{}' '''.format(name))[0]['oid']) sub_shares = get_cgroup_prop('/cpu/gpdb/{}/cpu.shares'.format(oid)) assert sub_shares == shares * cpu_rate_limit / 100 +# SUB/shares := TOP/shares * cpu_rate_limit def check_group_shares(name): cpu_rate_limit = int(plpy.execute(''' SELECT value FROM pg_resgroupcapability c, pg_resgroup g WHERE c.resgroupid=g.oid AND reslimittype=2 AND g.rsgname='{}' '''.format(name))[0]['value']) oid = int(plpy.execute(''' SELECT oid FROM pg_resgroup WHERE rsgname='{}' '''.format(name))[0]['oid']) sub_shares = get_cgroup_prop('/cpu/gpdb/{}/cpu.shares'.format(oid)) assert sub_shares == int(shares * cpu_rate_limit / 100) # check default groups check_group_shares('default_group') check_group_shares('admin_group') # check user groups check_group_shares('rg1_cpu_test') check_group_shares('rg2_cpu_test') $$; DO From 9aae16b108ed93807fd1fee049627faf0ae614ed Mon Sep 17 00:00:00 2001 From: Haotian Chen <108248800+CharlieTTXX@users.noreply.github.com> Date: Thu, 1 Dec 2022 10:14:22 +0800 Subject: [PATCH 32/46] NEW SYNTAX for resource group cpuset (#14332) NEW SYNTAX of resource group cpuset for different master and segment using syntax like cpuset="1;3-4" could different cpuset of master and segment by semicolon. As we define cpuset="1;3-4", master will apply the first cpu core, segments apply third and fourth core at same time. Differentiate mater and segment by seperating cpuset through semicolon, then apply the first half of it to master and second half to segment. --- src/backend/commands/resgroupcmds.c | 113 +++++++++++++++--- src/backend/utils/resgroup/resgroup.c | 24 ++-- src/include/utils/resgroup.h | 12 +- .../expected/resgroup/resgroup_syntax.out | 47 +++++++- .../input/resgroup/resgroup_cpuset.source | 19 +++ .../output/resgroup/resgroup_cpuset.source | 37 ++++++ .../sql/resgroup/resgroup_syntax.sql | 25 +++- 7 files changed, 250 insertions(+), 27 deletions(-) diff --git a/src/backend/commands/resgroupcmds.c b/src/backend/commands/resgroupcmds.c index c0efa7be4b1..92703cf3f8c 100644 --- a/src/backend/commands/resgroupcmds.c +++ b/src/backend/commands/resgroupcmds.c @@ -97,7 +97,7 @@ static void createResgroupCallback(XactEvent event, void *arg); static void dropResgroupCallback(XactEvent event, void *arg); static void alterResgroupCallback(XactEvent event, void *arg); static int getResGroupMemAuditor(char *name); -static bool checkCpusetSyntax(const char *cpuset); +static void checkCpusetSyntax(const char *cpuset); /* * CREATE RESOURCE GROUP @@ -250,13 +250,14 @@ CreateResourceGroup(CreateResourceGroupStmt *stmt) { EnsureCpusetIsAvailable(ERROR); - cgroupOpsRoutine->setcpuset(groupid, caps.cpuset); + char *cpuset = getCpuSetByRole(caps.cpuset); + cgroupOpsRoutine->setcpuset(groupid, cpuset); /* reset default group, subtract new group cpu cores */ char defaultGroupCpuset[MaxCpuSetLength]; cgroupOpsRoutine->getcpuset(DEFAULT_CPUSET_GROUP_ID, defaultGroupCpuset, MaxCpuSetLength); - CpusetDifference(defaultGroupCpuset, caps.cpuset, MaxCpuSetLength); + CpusetDifference(defaultGroupCpuset, cpuset, MaxCpuSetLength); cgroupOpsRoutine->setcpuset(DEFAULT_CPUSET_GROUP_ID, defaultGroupCpuset); } SIMPLE_FAULT_INJECTOR("create_resource_group_fail"); @@ -402,9 +403,8 @@ AlterResourceGroup(AlterResourceGroupStmt *stmt) else if (limitType == RESGROUP_LIMIT_TYPE_CPUSET) { EnsureCpusetIsAvailable(ERROR); - cpuset = defGetString(defel); - checkCpusetSyntax(cpuset); + checkCpuSetByRole(cpuset); } else { @@ -1013,8 +1013,8 @@ parseStmtOptions(CreateResourceGroupStmt *stmt, ResGroupCaps *caps) if (type == RESGROUP_LIMIT_TYPE_CPUSET) { const char *cpuset = defGetString(defel); - checkCpusetSyntax(cpuset); strlcpy(caps->cpuset, cpuset, sizeof(caps->cpuset)); + checkCpuSetByRole(cpuset); caps->cpuRateLimit = CPU_RATE_LIMIT_DISABLED; } else @@ -1294,18 +1294,20 @@ validateCapabilities(Relation rel, gp_resource_group_enable_cgroup_cpuset) { Bitmapset *bmsAll = NULL; + Bitmapset *bmsMissing = NULL; /* Get all available cores */ cgroupOpsRoutine->getcpuset(CGROUP_ROOT_ID, cpusetAll, MaxCpuSetLength); bmsAll = CpusetToBitset(cpusetAll, MaxCpuSetLength); + /* Check whether the cores in this group are available */ if (!CpusetIsEmpty(caps->cpuset)) { - Bitmapset *bmsMissing = NULL; + char *cpuset = getCpuSetByRole(caps->cpuset); + bmsCurrent = CpusetToBitset(cpuset, MaxCpuSetLength); - bmsCurrent = CpusetToBitset(caps->cpuset, MaxCpuSetLength); bmsCommon = bms_intersect(bmsCurrent, bmsAll); bmsMissing = bms_difference(bmsCurrent, bmsCommon); @@ -1315,8 +1317,8 @@ validateCapabilities(Relation rel, ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("cpu cores %s are unavailable on the system", - cpusetMissing))); + errmsg("cpu cores %s are unavailable on the system", + cpusetMissing))); } } } @@ -1398,7 +1400,8 @@ validateCapabilities(Relation rel, Assert(!bms_is_empty(bmsCurrent)); - bmsOther = CpusetToBitset(valueStr, MaxCpuSetLength); + char *cpuset = getCpuSetByRole(valueStr); + bmsOther = CpusetToBitset(cpuset, MaxCpuSetLength); bmsCommon = bms_intersect(bmsCurrent, bmsOther); if (!bms_is_empty(bmsCommon)) @@ -1549,16 +1552,22 @@ getResGroupMemAuditor(char *name) /* * check whether the cpuset value is syntactically right */ -static bool +static void checkCpusetSyntax(const char *cpuset) { + if (cpuset == NULL) + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cpuset invalid"))); + } + if (strlen(cpuset) >= MaxCpuSetLength) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("the length of cpuset reached the upper limit %d", MaxCpuSetLength))); - return false; } if (!CpusetToBitset(cpuset, strlen(cpuset))) @@ -1566,7 +1575,81 @@ checkCpusetSyntax(const char *cpuset) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("cpuset invalid"))); - return false; } - return true; + + return; +} + +/* + * Check Cpuset by coordinator and segment + */ +extern void +checkCpuSetByRole(const char *cpuset) +{ + char **arraycpuset = (char **)palloc0(sizeof(char *) * CpuSetArrayLength); + char *copycpuset = (char *)palloc0(sizeof(char) * MaxCpuSetLength); + strcpy(copycpuset, cpuset); + + int cnt = 0; + for (int i = 0; i < sizeof(cpuset); i++) + { + if (cpuset[i] == ';') + cnt++; + } + + if (cnt == 0) + { + checkCpusetSyntax(copycpuset); + arraycpuset[0] = copycpuset; + } + else if (cnt == 1) + { + int iter = 0; + char *nextcpuset = strtok(copycpuset, ";"); + while (nextcpuset != NULL) + { + arraycpuset[iter++] = nextcpuset; + nextcpuset = strtok(NULL, ";"); + } + checkCpusetSyntax(arraycpuset[0]); + checkCpusetSyntax(arraycpuset[1]); + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cpuset invalid"))); + + pfree(copycpuset); + pfree(arraycpuset); + return; +} + +/* + * Seperate cpuset by coordinator and segment + * Return as splitcpuset + */ +extern char * +getCpuSetByRole(const char *cpuset) +{ + int iter = 0; + char *splitcpuset = NULL; + + char **arraycpuset = (char **)palloc0(sizeof(char *) * CpuSetArrayLength); + char *copycpuset = (char *)palloc0(sizeof(char) * MaxCpuSetLength); + strcpy(copycpuset, cpuset); + + char *nextcpuset = strtok(copycpuset, ";"); + while (nextcpuset != NULL) + { + arraycpuset[iter++] = nextcpuset; + nextcpuset = strtok(NULL, ";"); + } + + /* Get result cpuset by gprole, on master or segment */ + if (Gp_role == GP_ROLE_EXECUTE && arraycpuset[1] != NULL) + splitcpuset = arraycpuset[1]; + else + splitcpuset = arraycpuset[0]; + + return splitcpuset; } diff --git a/src/backend/utils/resgroup/resgroup.c b/src/backend/utils/resgroup/resgroup.c index 7b7a32afa48..46dbc6457f1 100644 --- a/src/backend/utils/resgroup/resgroup.c +++ b/src/backend/utils/resgroup/resgroup.c @@ -637,6 +637,7 @@ InitResGroups(void) Oid groupId = ((Form_pg_resgroup) GETSTRUCT(tuple))->oid; ResGroupData *group; int cpuRateLimit; + Bitmapset *bmsCurrent; GetResGroupCapabilities(relResGroupCapability, groupId, &caps); cpuRateLimit = caps.cpuRateLimit; @@ -653,8 +654,9 @@ InitResGroups(void) } else { - Bitmapset *bmsCurrent = CpusetToBitset(caps.cpuset, - MaxCpuSetLength); + char *cpuset = getCpuSetByRole(caps.cpuset); + bmsCurrent = CpusetToBitset(cpuset, MaxCpuSetLength); + Bitmapset *bmsCommon = bms_intersect(bmsCurrent, bmsUnused); Bitmapset *bmsMissing = bms_difference(bmsCurrent, bmsCommon); @@ -679,7 +681,8 @@ InitResGroups(void) * write cpus to corresponding file * if all the cores are available */ - cgroupOpsRoutine->setcpuset(groupId, caps.cpuset); + char *cpuset= getCpuSetByRole(caps.cpuset); + cgroupOpsRoutine->setcpuset(groupId, cpuset); bmsUnused = bms_del_members(bmsUnused, bmsCurrent); } else @@ -931,8 +934,11 @@ ResGroupAlterOnCommit(const ResourceGroupCallbackContext *callbackCtx) else if (callbackCtx->limittype == RESGROUP_LIMIT_TYPE_CPUSET) { if (gp_resource_group_enable_cgroup_cpuset) + { + char *cpuset = getCpuSetByRole(callbackCtx->caps.cpuset); cgroupOpsRoutine->setcpuset(callbackCtx->groupid, - callbackCtx->caps.cpuset); + cpuset); + } } else if (callbackCtx->limittype != RESGROUP_LIMIT_TYPE_MEMORY_SPILL_RATIO) { @@ -957,12 +963,14 @@ ResGroupAlterOnCommit(const ResourceGroupCallbackContext *callbackCtx) MaxCpuSetLength); /* Add old value to default group * sub new value from default group */ + char *cpuset= getCpuSetByRole(callbackCtx->caps.cpuset); + char *oldcpuset = getCpuSetByRole(callbackCtx->oldCaps.cpuset); CpusetUnion(defaultCpusetGroup, - callbackCtx->oldCaps.cpuset, - MaxCpuSetLength); + oldcpuset, + MaxCpuSetLength); CpusetDifference(defaultCpusetGroup, - callbackCtx->caps.cpuset, - MaxCpuSetLength); + cpuset, + MaxCpuSetLength); cgroupOpsRoutine->setcpuset(DEFAULT_CPUSET_GROUP_ID, defaultCpusetGroup); } } diff --git a/src/include/utils/resgroup.h b/src/include/utils/resgroup.h index ef3b3aafd5c..a9e4f08d4e0 100644 --- a/src/include/utils/resgroup.h +++ b/src/include/utils/resgroup.h @@ -31,6 +31,11 @@ */ #define MaxCpuSetLength 1024 +/* + * The max length of cpuset array + */ +#define CpuSetArrayLength 2 + /* * Default value of cpuset */ @@ -85,8 +90,9 @@ typedef struct ResGroupCaps } ResGroupCaps; /* Set 'cpuset' to an empty string, and reset all other fields to zero */ -#define ClearResGroupCaps(caps) \ - MemSet((caps), 0, offsetof(ResGroupCaps, cpuset) + 1) +#define ClearResGroupCaps(caps) do { \ + MemSet((caps), 0, offsetof(ResGroupCaps, cpuset) + 1); \ +} while(0) /* @@ -229,6 +235,8 @@ extern void ResGroupMoveQuery(int sessionId, Oid groupId, const char *groupName) extern int32 ResGroupGetSessionMemUsage(int sessionId); extern int32 ResGroupGetGroupAvailableMem(Oid groupId); extern Oid ResGroupGetGroupIdBySessionId(int sessionId); +extern char *getCpuSetByRole(const char *cpuset); +extern void checkCpuSetByRole(const char *cpuset); #define LOG_RESGROUP_DEBUG(...) \ do {if (Debug_resource_group) elog(__VA_ARGS__); } while(false); diff --git a/src/test/isolation2/expected/resgroup/resgroup_syntax.out b/src/test/isolation2/expected/resgroup/resgroup_syntax.out index cba7d0ec561..0f14632550d 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_syntax.out +++ b/src/test/isolation2/expected/resgroup/resgroup_syntax.out @@ -153,6 +153,17 @@ CREATE RESOURCE GROUP rg_test_group WITH (cpuset='3-1', memory_limit=5); ERROR: cpuset invalid CREATE RESOURCE GROUP rg_test_group WITH (cpuset=' 0 ', memory_limit=5); ERROR: cpuset invalid +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0;3-1', memory_limit=5); +ERROR: cpuset invalid +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='4;a', memory_limit=5); +ERROR: cpuset invalid +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-;4', memory_limit=5); +ERROR: cpuset invalid +CREATE RESOURCE GROUP rg_test_group WITH (cpuset=';5', memory_limit=5); +ERROR: cpuset invalid +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='5;', memory_limit=5); +ERROR: cpuset invalid + ---- suppose the core numbered 1024 is not exist CREATE RESOURCE GROUP rg_test_group WITH (cpuset='1024', memory_limit=5); ERROR: cpu cores 1024 are unavailable on the system @@ -179,6 +190,22 @@ ALTER RESOURCE GROUP rg_test_group set CPUSET '3-1'; ERROR: cpuset invalid ALTER RESOURCE GROUP rg_test_group set CPUSET ' 0 '; ERROR: cpuset invalid +ALTER RESOURCE GROUP rg_test_group set CPUSET '5;3-1'; +ERROR: cpuset invalid +ALTER RESOURCE GROUP rg_test_group set CPUSET '4;a'; +ERROR: cpuset invalid +ALTER RESOURCE GROUP rg_test_group set CPUSET '-;4'; +ERROR: cpuset invalid +ALTER RESOURCE GROUP rg_test_group set CPUSET ';5'; +ERROR: cpuset invalid +ALTER RESOURCE GROUP rg_test_group set CPUSET '5;'; +ERROR: cpuset invalid +ALTER RESOURCE GROUP rg_test_group set CPUSET ';'; +ERROR: cpuset invalid +ALTER RESOURCE GROUP rg_test_group set CPUSET '1;2;'; +ERROR: cpuset invalid +ALTER RESOURCE GROUP rg_test_group set CPUSET '1;2;3'; +ERROR: cpuset invalid ---- suppose the core numbered 1024 is not exist ALTER RESOURCE GROUP rg_test_group set CPUSET '1024'; ERROR: cpu cores 1024 are unavailable on the system @@ -234,7 +261,15 @@ SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,mem (1 row) DROP RESOURCE GROUP rg_test_group; DROP - +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='1;4-5'); +CREATE +SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; + groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio +---------------+-------------+----------------+--------------+---------------------+-------------------- + rg_test_group | 20 | -1 | 0 | 80 | 0 +(1 row) +DROP RESOURCE GROUP rg_test_group; +DROP -- ---------------------------------------------------------------------- -- Test: boundary check in create resource group syntax -- ---------------------------------------------------------------------- @@ -613,3 +648,13 @@ ALTER RESOURCE GROUP rg_test_group SET memory_limit 0; ERROR: when memory_limit is unlimited memory_spill_ratio must be set to 0 DROP RESOURCE GROUP rg_test_group; DROP + +-- positive: test master/segment cpuset +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='1;4-5'); +CREATE +ALTER RESOURCE GROUP rg_test_group SET CPUSET '2;4-5'; +ALTER +ALTER RESOURCE GROUP rg_test_group SET CPUSET '3;4-5'; +ALTER +DROP RESOURCE GROUP rg_test_group; +DROP diff --git a/src/test/isolation2/input/resgroup/resgroup_cpuset.source b/src/test/isolation2/input/resgroup/resgroup_cpuset.source index f37a9d6dbac..2d7170e57be 100644 --- a/src/test/isolation2/input/resgroup/resgroup_cpuset.source +++ b/src/test/isolation2/input/resgroup/resgroup_cpuset.source @@ -291,6 +291,25 @@ SELECT gp_inject_fault('create_resource_group_fail', 'reset', 1); DROP RESOURCE GROUP rg1_test_group; -- end_ignore +-- test segment/master cpuset +CREATE RESOURCE GROUP rg_multi_cpuset1 WITH (concurrency=2, cpuset='0;1-2'); + +CREATE RESOURCE GROUP rg_multi_cpuset WITH (concurrency=2, cpuset='0;1-2'); +CREATE RESOURCE GROUP rg_multi_cpuset WITH (concurrency=2, cpuset='0'); + +CREATE RESOURCE GROUP rg_multi_cpuset2 WITH (concurrency=2, cpuset='3;4-5'); + +ALTER RESOURCE GROUP rg_multi_cpuset2 set CPUSET '0;1-3'; +ALTER RESOURCE GROUP rg_multi_cpuset2 set CPUSET '6'; +ALTER RESOURCE GROUP rg_multi_cpuset1 set CPUSET '3;4-5'; +ALTER RESOURCE GROUP rg_multi_cpuset2 set CPUSET '0;1-2'; + +select groupname,cpuset from gp_toolkit.gp_resgroup_config where groupname='rg_multi_cpuset1'; +select groupname,cpuset from gp_toolkit.gp_resgroup_config where groupname='rg_multi_cpuset2'; + +DROP RESOURCE GROUP rg_multi_cpuset1; +DROP RESOURCE GROUP rg_multi_cpuset2; + REVOKE ALL ON busy FROM role1_cpuset_test; DROP ROLE role1_cpuset_test; DROP RESOURCE GROUP rg1_cpuset_test; diff --git a/src/test/isolation2/output/resgroup/resgroup_cpuset.source b/src/test/isolation2/output/resgroup/resgroup_cpuset.source index 925cfe57959..7cfab076fde 100644 --- a/src/test/isolation2/output/resgroup/resgroup_cpuset.source +++ b/src/test/isolation2/output/resgroup/resgroup_cpuset.source @@ -284,6 +284,43 @@ SELECT gp_inject_fault('create_resource_group_fail', 'reset', 1); Success: (1 row) +-- test segment/master cpuset +CREATE RESOURCE GROUP rg_multi_cpuset1 WITH (concurrency=2, cpuset='0;1-2'); +CREATE + +CREATE RESOURCE GROUP rg_multi_cpuset WITH (concurrency=2, cpuset='0;1-2'); +ERROR: cpu cores 0 are used by resource group rg_multi_cpuset1 +CREATE RESOURCE GROUP rg_multi_cpuset WITH (concurrency=2, cpuset='0'); +ERROR: cpu cores 0 are used by resource group rg_multi_cpuset1 + +CREATE RESOURCE GROUP rg_multi_cpuset2 WITH (concurrency=2, cpuset='3;4-5'); +CREATE + +ALTER RESOURCE GROUP rg_multi_cpuset2 set CPUSET '0;1-3'; +ERROR: cpu cores 0 are used by resource group rg_multi_cpuset1 +ALTER RESOURCE GROUP rg_multi_cpuset2 set CPUSET '6'; +ALTER +ALTER RESOURCE GROUP rg_multi_cpuset1 set CPUSET '3;4-5'; +ALTER +ALTER RESOURCE GROUP rg_multi_cpuset2 set CPUSET '0;1-2'; +ALTER + +select groupname,cpuset from gp_toolkit.gp_resgroup_config where groupname='rg_multi_cpuset1'; + groupname | cpuset +------------------+-------- + rg_multi_cpuset1 | 3;4-5 +(1 row) +select groupname,cpuset from gp_toolkit.gp_resgroup_config where groupname='rg_multi_cpuset2'; + groupname | cpuset +------------------+-------- + rg_multi_cpuset2 | 0;1-2 +(1 row) + +DROP RESOURCE GROUP rg_multi_cpuset1; +DROP +DROP RESOURCE GROUP rg_multi_cpuset2; +DROP + REVOKE ALL ON busy FROM role1_cpuset_test; REVOKE DROP ROLE role1_cpuset_test; diff --git a/src/test/isolation2/sql/resgroup/resgroup_syntax.sql b/src/test/isolation2/sql/resgroup/resgroup_syntax.sql index 3a6a07e3c14..693cf9e9ab3 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_syntax.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_syntax.sql @@ -77,6 +77,12 @@ CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0-,', memory_limit=5); CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-1', memory_limit=5); CREATE RESOURCE GROUP rg_test_group WITH (cpuset='3-1', memory_limit=5); CREATE RESOURCE GROUP rg_test_group WITH (cpuset=' 0 ', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0;3-1', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='4;a', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-;4', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset=';5', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='5;', memory_limit=5); + ---- suppose the core numbered 1024 is not exist CREATE RESOURCE GROUP rg_test_group WITH (cpuset='1024', memory_limit=5); CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,', memory_limit=5); @@ -91,6 +97,14 @@ ALTER RESOURCE GROUP rg_test_group set CPUSET '0-'; ALTER RESOURCE GROUP rg_test_group set CPUSET '-1'; ALTER RESOURCE GROUP rg_test_group set CPUSET '3-1'; ALTER RESOURCE GROUP rg_test_group set CPUSET ' 0 '; +ALTER RESOURCE GROUP rg_test_group set CPUSET '5;3-1'; +ALTER RESOURCE GROUP rg_test_group set CPUSET '4;a'; +ALTER RESOURCE GROUP rg_test_group set CPUSET '-;4'; +ALTER RESOURCE GROUP rg_test_group set CPUSET ';5'; +ALTER RESOURCE GROUP rg_test_group set CPUSET '5;'; +ALTER RESOURCE GROUP rg_test_group set CPUSET ';'; +ALTER RESOURCE GROUP rg_test_group set CPUSET '1;2;'; +ALTER RESOURCE GROUP rg_test_group set CPUSET '1;2;3'; ---- suppose the core numbered 1024 is not exist ALTER RESOURCE GROUP rg_test_group set CPUSET '1024'; ALTER RESOURCE GROUP rg_test_group set CPUSET '0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,'; @@ -115,7 +129,10 @@ DROP RESOURCE GROUP rg_test_group; CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0'); SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; DROP RESOURCE GROUP rg_test_group; - +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='1;4-5'); +SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio +FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; +DROP RESOURCE GROUP rg_test_group; -- ---------------------------------------------------------------------- -- Test: boundary check in create resource group syntax -- ---------------------------------------------------------------------- @@ -319,3 +336,9 @@ DROP RESOURCE GROUP rg_test_group; CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=10); ALTER RESOURCE GROUP rg_test_group SET memory_limit 0; DROP RESOURCE GROUP rg_test_group; + +-- positive: test master/segment cpuset +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='1;4-5'); +ALTER RESOURCE GROUP rg_test_group SET CPUSET '2;4-5'; +ALTER RESOURCE GROUP rg_test_group SET CPUSET '3;4-5'; +DROP RESOURCE GROUP rg_test_group; From 4a0664888850e98bf37e9b0af6bafefa18c5144f Mon Sep 17 00:00:00 2001 From: Zhenglong Li Date: Mon, 5 Dec 2022 10:46:20 +0800 Subject: [PATCH 33/46] fix link problem in macOS and Windows (#14587) fix link problems in macOS and Windows which was introduced by #14343. --- src/backend/cdb/dispatcher/cdbdisp_query.c | 5 +- src/backend/executor/execMain.c | 5 +- src/backend/utils/resgroup/Makefile | 2 + src/backend/utils/resgroup/cgroup-ops-dummy.c | 314 ++++++++++++++++++ .../utils/resgroup/cgroup-ops-linux-v1.c | 81 +++++ src/backend/utils/resgroup/cgroup.c | 80 ----- src/backend/utils/resgroup/resgroup.c | 9 +- src/include/utils/cgroup-ops-dummy.h | 24 ++ src/include/utils/cgroup.h | 7 +- 9 files changed, 440 insertions(+), 87 deletions(-) create mode 100644 src/backend/utils/resgroup/cgroup-ops-dummy.c create mode 100644 src/include/utils/cgroup-ops-dummy.h diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index af72e05d9df..98374854bb1 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -49,6 +49,9 @@ #include "cdb/cdbcopy.h" #include "executor/execUtils.h" +extern CGroupOpsRoutine *cgroupOpsRoutine; +extern CGroupSystemInfo *cgroupSystemInfo; + #define QUERY_STRING_TRUNCATE_SIZE (1024) extern bool Test_print_direct_dispatch_info; @@ -278,7 +281,7 @@ CdbDispatchPlan(struct QueryDesc *queryDesc, * We enable resource group re-calculate the query_mem on QE, and we are not in * fall back mode (use statement_mem). */ - stmt->total_memory_coordinator = getTotalMemory(); + stmt->total_memory_coordinator = cgroupOpsRoutine->gettotalmemory(); stmt->nsegments_coordinator = ResGroupGetHostPrimaryCount(); } diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 83bb0007a67..bc9807e2431 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -115,6 +115,9 @@ #include "cdb/cdbutil.h" #include "cdb/cdbendpoint.h" +extern CGroupOpsRoutine *cgroupOpsRoutine; +extern CGroupSystemInfo *cgroupSystemInfo; + #define IS_PARALLEL_RETRIEVE_CURSOR(queryDesc) (queryDesc->ddesc && \ queryDesc->ddesc->parallelCursorName && \ strlen(queryDesc->ddesc->parallelCursorName) > 0) @@ -274,7 +277,7 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) should_skip_operator_memory_assign = false; /* Get total system memory on the QE in MB */ - int total_memory_segment = getTotalMemory(); + int total_memory_segment = cgroupOpsRoutine->gettotalmemory(); int nsegments_segment = ResGroupGetHostPrimaryCount(); uint64 coordinator_query_mem = queryDesc->plannedstmt->query_mem; diff --git a/src/backend/utils/resgroup/Makefile b/src/backend/utils/resgroup/Makefile index 06b72952707..eea0ea3ba1b 100644 --- a/src/backend/utils/resgroup/Makefile +++ b/src/backend/utils/resgroup/Makefile @@ -18,6 +18,8 @@ OBJS = resgroup.o resgroup_helper.o ifeq ($(PORTNAME),linux) OBJS += cgroup.o OBJS += cgroup-ops-linux-v1.o +else +OBJS += cgroup-ops-dummy.o endif include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/utils/resgroup/cgroup-ops-dummy.c b/src/backend/utils/resgroup/cgroup-ops-dummy.c new file mode 100644 index 00000000000..ab7a6897ce4 --- /dev/null +++ b/src/backend/utils/resgroup/cgroup-ops-dummy.c @@ -0,0 +1,314 @@ +/*------------------------------------------------------------------------- + * + * cgroup-ops-dummy.c + * OS dependent resource group operations - dummy implementation + * + * Copyright (c) 2017 VMware, Inc. or its affiliates. + * + * + * IDENTIFICATION + * src/backend/utils/resgroup/cgroup-ops-dummy.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "utils/cgroup.h" +#include "utils/cgroup-ops-dummy.h" + +/* + * Interfaces for OS dependent operations. + * + * Resource group relies on OS dependent group implementation to manage + * resources like cpu usage, such as cgroup on Linux system. + * We call it OS group in below function description. + * + * So far these operations are mainly for CPU rate limitation and accounting. + */ + +#define unsupported_system() \ + elog(WARNING, "resource group is not supported on this system") + + +static CGroupSystemInfo cgroupSystemInfoAlpha = { + 0, + "" +}; + +/* Return the name for the OS group implementation */ +static const char * +getcgroupname_dummy(void) +{ + return "unsupported"; +} + +/* + * Probe the configuration for the OS group implementation. + * + * Return true if everything is OK, or false is some requirements are not + * satisfied. Will not fail in either case. + */ +static bool +probecgroup_dummy(void) +{ + return false; +} + +/* Check whether the OS group implementation is available and usable */ +static void +checkcgroup_dummy(void) +{ + unsupported_system(); +} + +/* Initialize the OS group */ +static void +initcgroup_dummy(void) +{ + unsupported_system(); +} + +/* Adjust GUCs for this OS group implementation */ +static void +adjustgucs_dummy(void) +{ + unsupported_system(); +} + +/* + * Create the OS group for group. + */ +static void +createcgroup_dummy(Oid group) +{ + unsupported_system(); +} + +/* + * Assign a process to the OS group. A process can only be assigned to one + * OS group, if it's already running under other OS group then it'll be moved + * out that OS group. + * + * pid is the process id. + */ +static void +attachcgroup_dummy(Oid group, int pid, bool is_cpuset_enabled) +{ + unsupported_system(); +} + +/* + * un-assign all the processes from a cgroup. + * + * These processes will be moved to the gpdb default cgroup. + * + * This function must be called with the gpdb toplevel dir locked, + * fd_dir is the fd for this lock, on any failure fd_dir will be closed + * (and unlocked implicitly) then an error is raised. + */ +static void +detachcgroup_dummy(Oid group, CGroupComponentType component, int fd_dir) +{ + unsupported_system(); +} + +/* + * Destroy the OS group for group. + * + * One OS group can not be dropped if there are processes running under it, + * if migrate is true these processes will be moved out automatically. + */ +static void +destroycgroup_dummy(Oid group, bool migrate) +{ + unsupported_system(); +} + +/* + * Lock the OS group. While the group is locked it won't be removed by other + * processes. + * + * This function would block if block is true, otherwise it returns with -1 + * immediately. + * + * On success, it returns a fd to the OS group, pass it to + * ResGroupOps_UnLockGroup() to unlock it. + */ +static int +lockcgroup_dummy(Oid group, CGroupComponentType component, bool block) +{ + unsupported_system(); + return -1; +} + +/* + * Unblock an OS group. + * + * fd is the value returned by lockcgroup_dummy(). + */ +static void +unlockcgroup_dummy(int fd) +{ + unsupported_system(); +} + +/* + * Set the cpu rate limit for the OS group. + * + * cpu_rate_limit should be within [0, 100]. + */ +static void +setcpulimit_dummy(Oid group, int cpu_rate_limit) +{ + unsupported_system(); +} + +/* + * Set the memory limit for the OS group by value. + * + * memory_limit is the limit value in chunks + * + * If cgroup supports memory swap, we will write the same limit to + * memory.memsw.limit and memory.limit. + */ +static void +setmemorylimitbychunks_dummy(Oid group, int32 memory_limit_chunks) +{ + unsupported_system(); +} + +/* + * Set the memory limit for the OS group by rate. + * + * memory_limit should be within [0, 100]. + */ +static void +setmemorylimit_dummy(Oid group, int memory_limit) +{ + unsupported_system(); +} + +/* + * Get the cpu usage of the OS group, that is the total cpu time obtained + * by this OS group, in nano seconds. + */ +static int64 +getcpuusage_dummy(Oid group) +{ + unsupported_system(); + return 0; +} + +/* + * Get the memory usage of the OS group + * + * memory usage is returned in chunks + */ +static int32 +getmemoryusage_dummy(Oid group) +{ + unsupported_system(); + return 0; +} + +/* + * Get the memory limit of the OS group + * + * memory limit is returned in chunks + */ +static int32 +getmemorylimitchunks_dummy(Oid group) +{ + unsupported_system(); + return 0; +} + +/* + * Get the cpuset of the OS group. + * @param group: the destination group + * @param cpuset: the str to be set + * @param len: the upper limit of the str + */ +static void +getcpuset_dummy(Oid group, char *cpuset, int len) +{ + unsupported_system(); +} + +/* + * Set the cpuset for the OS group. + * @param group: the destination group + * @param cpuset: the value to be set + * The syntax of CPUSET is a combination of the tuples, each tuple represents + * one core number or the core numbers interval, separated by comma. + * E.g. 0,1,2-3. + */ +static void +setcpuset_dummy(Oid group, const char *cpuset) +{ + unsupported_system(); +} + +/* + * Convert the cpu usage to percentage within the duration. + * + * usage is the delta of GetCpuUsage() of a duration, + * duration is in micro seconds. + * + * When fully consuming one cpu core the return value will be 100.0 . + */ +static float +convertcpuusage_dummy(int64 usage, int64 duration) +{ + unsupported_system(); + return 0.0; +} + +static int32 +gettotalmemory_dummy(void) +{ + unsupported_system(); + return 0.0; +} + +static CGroupOpsRoutine cGroupOpsRoutineDummy = { + .getcgroupname = getcgroupname_dummy, + + .probecgroup = probecgroup_dummy, + .checkcgroup = checkcgroup_dummy, + .initcgroup = initcgroup_dummy, + .adjustgucs = adjustgucs_dummy, + .createcgroup = createcgroup_dummy, + .destroycgroup = destroycgroup_dummy, + + .attachcgroup = attachcgroup_dummy, + .detachcgroup = detachcgroup_dummy, + + .lockcgroup = lockcgroup_dummy, + .unlockcgroup = unlockcgroup_dummy, + + .setcpulimit = setcpulimit_dummy, + .getcpuusage = getcpuusage_dummy, + + .gettotalmemory = gettotalmemory_dummy, + .getmemoryusage = getmemoryusage_dummy, + .setmemorylimit = setmemorylimit_dummy, + .getmemorylimitchunks = getmemorylimitchunks_dummy, + .setmemorylimitbychunks = setmemorylimitbychunks_dummy, + + .getcpuset = getcpuset_dummy, + .setcpuset = setcpuset_dummy, + + .convertcpuusage = convertcpuusage_dummy, +}; + +CGroupOpsRoutine *get_cgroup_routine_dummy(void) +{ + return &cGroupOpsRoutineDummy; +} + +CGroupSystemInfo *get_cgroup_sysinfo_dummy(void) +{ + return &cgroupSystemInfoAlpha; +} diff --git a/src/backend/utils/resgroup/cgroup-ops-linux-v1.c b/src/backend/utils/resgroup/cgroup-ops-linux-v1.c index d32eedc2e4f..7d3b774dfd2 100644 --- a/src/backend/utils/resgroup/cgroup-ops-linux-v1.c +++ b/src/backend/utils/resgroup/cgroup-ops-linux-v1.c @@ -1244,6 +1244,86 @@ getcpuusage_v1(Oid group) return readInt64(group, BASEDIR_GPDB, component, "cpuacct.usage"); } +/* get cgroup ram and swap (in Byte) */ +static void +get_cgroup_memory_info(uint64 *cgram, uint64 *cgmemsw) +{ + CGroupComponentType component = CGROUP_COMPONENT_MEMORY; + + *cgram = readInt64(CGROUP_ROOT_ID, BASEDIR_PARENT, + component, "memory.limit_in_bytes"); + + if (gp_resource_group_enable_cgroup_swap) + { + *cgmemsw = readInt64(CGROUP_ROOT_ID, BASEDIR_PARENT, + component, "memory.memsw.limit_in_bytes"); + } + else + { + elog(DEBUG1, "swap memory is unlimited"); + *cgmemsw = (uint64) -1LL; + } +} + +/* get total ram and total swap (in Byte) from sysinfo */ +static void +get_memory_info(unsigned long *ram, unsigned long *swap) +{ + struct sysinfo info; + if (sysinfo(&info) < 0) + elog(ERROR, "can't get memory information: %m"); + *ram = info.totalram; + *swap = info.totalswap; +} + +/* get vm.overcommit_ratio */ +static int +getOvercommitRatio(void) +{ + int ratio; + char data[MAX_INT_STRING_LEN]; + size_t datasize = sizeof(data); + const char *path = "/proc/sys/vm/overcommit_ratio"; + + readData(path, data, datasize); + + if (sscanf(data, "%d", &ratio) != 1) + elog(ERROR, "invalid number '%s' in '%s'", data, path); + + return ratio; +} + +static int +gettotalmemory_v1(void) +{ + unsigned long ram, swap, total; + int overcommitRatio; + uint64 cgram, cgmemsw; + uint64 memsw; + uint64 outTotal; + + overcommitRatio = getOvercommitRatio(); + get_memory_info(&ram, &swap); + /* Get sysinfo total ram and swap size. */ + memsw = ram + swap; + outTotal = swap + ram * overcommitRatio / 100; + get_cgroup_memory_info(&cgram, &cgmemsw); + ram = Min(ram, cgram); + /* + * In the case that total ram and swap read from sysinfo is larger than + * from cgroup, ram and swap must both be limited, otherwise swap must + * not be limited(we can safely use the value from sysinfo as swap size). + */ + if (cgmemsw < memsw) + swap = cgmemsw - ram; + /* + * If it is in container, the total memory is limited by both the total + * memoery outside and the memsw of the container. + */ + total = Min(outTotal, swap + ram); + return total >> BITS_IN_MB; +} + /* * Get the memory usage of the OS group * @@ -1401,6 +1481,7 @@ static CGroupOpsRoutine cGroupOpsRoutineAlpha = { .getcpuset = getcpuset_v1, .setcpuset = setcpuset_v1, + .gettotalmemory = gettotalmemory_v1, .getmemoryusage = getmemoryusage_v1, .setmemorylimit = setmemorylimit_v1, .getmemorylimitchunks = getmemorylimitchunks_v1, diff --git a/src/backend/utils/resgroup/cgroup.c b/src/backend/utils/resgroup/cgroup.c index 03fd39dfc63..691172a173c 100644 --- a/src/backend/utils/resgroup/cgroup.c +++ b/src/backend/utils/resgroup/cgroup.c @@ -578,83 +578,3 @@ getCgroupMountDir() return strlen(cgroupSystemInfo->cgroup_dir) != 0; } - -/* get vm.overcommit_ratio */ -static int -getOvercommitRatio(void) -{ - int ratio; - char data[MAX_INT_STRING_LEN]; - size_t datasize = sizeof(data); - const char *path = "/proc/sys/vm/overcommit_ratio"; - - readData(path, data, datasize); - - if (sscanf(data, "%d", &ratio) != 1) - elog(ERROR, "invalid number '%s' in '%s'", data, path); - - return ratio; -} - -/* get cgroup ram and swap (in Byte) */ -static void -getCgMemoryInfo(uint64 *cgram, uint64 *cgmemsw) -{ - CGroupComponentType component = CGROUP_COMPONENT_MEMORY; - - *cgram = readInt64(CGROUP_ROOT_ID, BASEDIR_PARENT, - component, "memory.limit_in_bytes"); - - if (gp_resource_group_enable_cgroup_swap) - { - *cgmemsw = readInt64(CGROUP_ROOT_ID, BASEDIR_PARENT, - component, "memory.memsw.limit_in_bytes"); - } - else - { - elog(DEBUG1, "swap memory is unlimited"); - *cgmemsw = (uint64) -1LL; - } -} - -/* get total ram and total swap (in Byte) from sysinfo */ -static void -getMemoryInfo(unsigned long *ram, unsigned long *swap) -{ - struct sysinfo info; - if (sysinfo(&info) < 0) - elog(ERROR, "can't get memory information: %m"); - *ram = info.totalram; - *swap = info.totalswap; -} - -int -getTotalMemory(void) -{ - unsigned long ram, swap, total; - int overcommitRatio; - uint64 cgram, cgmemsw; - uint64 memsw; - uint64 outTotal; - - overcommitRatio = getOvercommitRatio(); - getMemoryInfo(&ram, &swap); - /* Get sysinfo total ram and swap size. */ - memsw = ram + swap; - outTotal = swap + ram * overcommitRatio / 100; - getCgMemoryInfo(&cgram, &cgmemsw); - ram = Min(ram, cgram); - /* - * In the case that total ram and swap read from sysinfo is larger than - * from cgroup, ram and swap must both be limited, otherwise swap must - * not be limited(we can safely use the value from sysinfo as swap size). - */ - if (cgmemsw < memsw) - swap = cgmemsw - ram; - /* - * If it is in container, the total memory is limited by both the total - * memoery outside and the memsw of the container. - */ - total = Min(outTotal, swap + ram); - return total >> BITS_IN_MB; -} diff --git a/src/backend/utils/resgroup/resgroup.c b/src/backend/utils/resgroup/resgroup.c index 46dbc6457f1..d0abff32e7f 100644 --- a/src/backend/utils/resgroup/resgroup.c +++ b/src/backend/utils/resgroup/resgroup.c @@ -75,6 +75,10 @@ #include "utils/session_state.h" #include "utils/vmem_tracker.h" #include "utils/cgroup-ops-v1.h" +#include "utils/cgroup-ops-dummy.h" + +extern CGroupOpsRoutine *cgroupOpsRoutine; +extern CGroupSystemInfo *cgroupSystemInfo; #define InvalidSlotId (-1) #define RESGROUP_MAX_SLOTS (MaxConnections) @@ -549,7 +553,8 @@ initCgroup(void) cgroupSystemInfo = get_cgroup_sysinfo_alpha(); } #else - elog(ERROR, "The resource group is not support on your operating system."); + cgroupOpsRoutine = get_cgroup_routine_dummy(); + cgroupSystemInfo = get_cgroup_sysinfo_dummy(); #endif bool probe_result = cgroupOpsRoutine->probecgroup(); @@ -2145,7 +2150,7 @@ decideTotalChunks(int32 *totalChunks, int32 *chunkSizeInBits) nsegments = Gp_role == GP_ROLE_EXECUTE ? host_primary_segment_count : pResGroupControl->segmentsOnMaster; Assert(nsegments > 0); - tmptotalChunks = getTotalMemory() * gp_resource_group_memory_limit / nsegments; + tmptotalChunks = cgroupOpsRoutine->gettotalmemory() * gp_resource_group_memory_limit / nsegments; /* * If vmem is larger than 16GB (i.e., 16K MB), we make the chunks bigger diff --git a/src/include/utils/cgroup-ops-dummy.h b/src/include/utils/cgroup-ops-dummy.h new file mode 100644 index 00000000000..c8b03331a22 --- /dev/null +++ b/src/include/utils/cgroup-ops-dummy.h @@ -0,0 +1,24 @@ +/*------------------------------------------------------------------------- + * + * cgroup-ops-dummy.h + * GPDB resource group definitions. + * + * Copyright (c) 2017 VMware, Inc. or its affiliates. + * + * + * IDENTIFICATION + * src/include/utils/cgroup-ops-dummy.h + * + * This file is for the OS that do not support cgroup, such as Windows, MacOS. + * + *------------------------------------------------------------------------- + */ +#ifndef RES_GROUP_OPS_DUMMY_H +#define RES_GROUP_OPS_DUMMY_H + +#include "utils/cgroup.h" + +extern CGroupOpsRoutine *get_cgroup_routine_dummy(void); +extern CGroupSystemInfo *get_cgroup_sysinfo_dummy(void); + +#endif /* RES_GROUP_OPS_DUMMY_H */ diff --git a/src/include/utils/cgroup.h b/src/include/utils/cgroup.h index 6838069f75a..15ec9794117 100644 --- a/src/include/utils/cgroup.h +++ b/src/include/utils/cgroup.h @@ -132,7 +132,6 @@ extern bool deleteDir(Oid group, CGroupComponentType component, const char *file extern int getCPUCores(void); extern bool getCgroupMountDir(void); -extern int getTotalMemory(void); /* * Interfaces for OS dependent operations @@ -174,6 +173,7 @@ typedef void (*setcpushare_function) (Oid group, int cpu_share); /* Get the cpu usage of the OS group. */ typedef int64 (*getcpuusage_function) (Oid group); +typedef int32 (*gettotalmemory_function) (void); typedef int32 (*getmemoryusage_function) (Oid group); typedef int32 (*getmemorylimitchunks_function) (Oid group); typedef void (*setmemorylimit_function) (Oid group, int memory_limit); @@ -215,6 +215,7 @@ typedef struct CGroupOpsRoutine getcpuusage_function getcpuusage; + gettotalmemory_function gettotalmemory; getmemoryusage_function getmemoryusage; setmemorylimit_function setmemorylimit; getmemorylimitchunks_function getmemorylimitchunks; @@ -227,9 +228,9 @@ typedef struct CGroupOpsRoutine } CGroupOpsRoutine; /* The global function handler. */ -extern CGroupOpsRoutine *cgroupOpsRoutine; +CGroupOpsRoutine *cgroupOpsRoutine; /* The global system info. */ -extern CGroupSystemInfo *cgroupSystemInfo; +CGroupSystemInfo *cgroupSystemInfo; #endif /* CGROUP_H */ From 482cf31902e5bb721d157cee60d2b66cdf8b349a Mon Sep 17 00:00:00 2001 From: Haotian Chen <108248800+CharlieTTXX@users.noreply.github.com> Date: Mon, 5 Dec 2022 19:46:19 +0800 Subject: [PATCH 34/46] Fix resource group cpuset test case (#14601) Fix dev pipline failure of previous PR #14332. --- .../expected/resgroup/resgroup_syntax.out | 16 +--------- .../input/resgroup/resgroup_cpuset.source | 16 ++-------- .../output/resgroup/resgroup_cpuset.source | 29 ++----------------- .../sql/resgroup/resgroup_syntax.sql | 10 +------ 4 files changed, 7 insertions(+), 64 deletions(-) diff --git a/src/test/isolation2/expected/resgroup/resgroup_syntax.out b/src/test/isolation2/expected/resgroup/resgroup_syntax.out index 0f14632550d..4edaa812a9f 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_syntax.out +++ b/src/test/isolation2/expected/resgroup/resgroup_syntax.out @@ -153,8 +153,6 @@ CREATE RESOURCE GROUP rg_test_group WITH (cpuset='3-1', memory_limit=5); ERROR: cpuset invalid CREATE RESOURCE GROUP rg_test_group WITH (cpuset=' 0 ', memory_limit=5); ERROR: cpuset invalid -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0;3-1', memory_limit=5); -ERROR: cpuset invalid CREATE RESOURCE GROUP rg_test_group WITH (cpuset='4;a', memory_limit=5); ERROR: cpuset invalid CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-;4', memory_limit=5); @@ -190,8 +188,6 @@ ALTER RESOURCE GROUP rg_test_group set CPUSET '3-1'; ERROR: cpuset invalid ALTER RESOURCE GROUP rg_test_group set CPUSET ' 0 '; ERROR: cpuset invalid -ALTER RESOURCE GROUP rg_test_group set CPUSET '5;3-1'; -ERROR: cpuset invalid ALTER RESOURCE GROUP rg_test_group set CPUSET '4;a'; ERROR: cpuset invalid ALTER RESOURCE GROUP rg_test_group set CPUSET '-;4'; @@ -261,7 +257,7 @@ SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,mem (1 row) DROP RESOURCE GROUP rg_test_group; DROP -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='1;4-5'); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0;0-1'); CREATE SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio @@ -648,13 +644,3 @@ ALTER RESOURCE GROUP rg_test_group SET memory_limit 0; ERROR: when memory_limit is unlimited memory_spill_ratio must be set to 0 DROP RESOURCE GROUP rg_test_group; DROP - --- positive: test master/segment cpuset -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='1;4-5'); -CREATE -ALTER RESOURCE GROUP rg_test_group SET CPUSET '2;4-5'; -ALTER -ALTER RESOURCE GROUP rg_test_group SET CPUSET '3;4-5'; -ALTER -DROP RESOURCE GROUP rg_test_group; -DROP diff --git a/src/test/isolation2/input/resgroup/resgroup_cpuset.source b/src/test/isolation2/input/resgroup/resgroup_cpuset.source index 2d7170e57be..80dda6dc8db 100644 --- a/src/test/isolation2/input/resgroup/resgroup_cpuset.source +++ b/src/test/isolation2/input/resgroup/resgroup_cpuset.source @@ -292,23 +292,11 @@ DROP RESOURCE GROUP rg1_test_group; -- end_ignore -- test segment/master cpuset -CREATE RESOURCE GROUP rg_multi_cpuset1 WITH (concurrency=2, cpuset='0;1-2'); - -CREATE RESOURCE GROUP rg_multi_cpuset WITH (concurrency=2, cpuset='0;1-2'); -CREATE RESOURCE GROUP rg_multi_cpuset WITH (concurrency=2, cpuset='0'); - -CREATE RESOURCE GROUP rg_multi_cpuset2 WITH (concurrency=2, cpuset='3;4-5'); - -ALTER RESOURCE GROUP rg_multi_cpuset2 set CPUSET '0;1-3'; -ALTER RESOURCE GROUP rg_multi_cpuset2 set CPUSET '6'; -ALTER RESOURCE GROUP rg_multi_cpuset1 set CPUSET '3;4-5'; -ALTER RESOURCE GROUP rg_multi_cpuset2 set CPUSET '0;1-2'; - +CREATE RESOURCE GROUP rg_multi_cpuset1 WITH (concurrency=2, cpuset='0;0'); +ALTER RESOURCE GROUP rg_multi_cpuset1 set CPUSET '1;1'; select groupname,cpuset from gp_toolkit.gp_resgroup_config where groupname='rg_multi_cpuset1'; -select groupname,cpuset from gp_toolkit.gp_resgroup_config where groupname='rg_multi_cpuset2'; DROP RESOURCE GROUP rg_multi_cpuset1; -DROP RESOURCE GROUP rg_multi_cpuset2; REVOKE ALL ON busy FROM role1_cpuset_test; DROP ROLE role1_cpuset_test; diff --git a/src/test/isolation2/output/resgroup/resgroup_cpuset.source b/src/test/isolation2/output/resgroup/resgroup_cpuset.source index 7cfab076fde..38eb29fc0c5 100644 --- a/src/test/isolation2/output/resgroup/resgroup_cpuset.source +++ b/src/test/isolation2/output/resgroup/resgroup_cpuset.source @@ -285,41 +285,18 @@ SELECT gp_inject_fault('create_resource_group_fail', 'reset', 1); (1 row) -- test segment/master cpuset -CREATE RESOURCE GROUP rg_multi_cpuset1 WITH (concurrency=2, cpuset='0;1-2'); +CREATE RESOURCE GROUP rg_multi_cpuset1 WITH (concurrency=2, cpuset='0;0'); CREATE - -CREATE RESOURCE GROUP rg_multi_cpuset WITH (concurrency=2, cpuset='0;1-2'); -ERROR: cpu cores 0 are used by resource group rg_multi_cpuset1 -CREATE RESOURCE GROUP rg_multi_cpuset WITH (concurrency=2, cpuset='0'); -ERROR: cpu cores 0 are used by resource group rg_multi_cpuset1 - -CREATE RESOURCE GROUP rg_multi_cpuset2 WITH (concurrency=2, cpuset='3;4-5'); -CREATE - -ALTER RESOURCE GROUP rg_multi_cpuset2 set CPUSET '0;1-3'; -ERROR: cpu cores 0 are used by resource group rg_multi_cpuset1 -ALTER RESOURCE GROUP rg_multi_cpuset2 set CPUSET '6'; -ALTER -ALTER RESOURCE GROUP rg_multi_cpuset1 set CPUSET '3;4-5'; -ALTER -ALTER RESOURCE GROUP rg_multi_cpuset2 set CPUSET '0;1-2'; +ALTER RESOURCE GROUP rg_multi_cpuset1 set CPUSET '1;1'; ALTER - select groupname,cpuset from gp_toolkit.gp_resgroup_config where groupname='rg_multi_cpuset1'; groupname | cpuset ------------------+-------- - rg_multi_cpuset1 | 3;4-5 -(1 row) -select groupname,cpuset from gp_toolkit.gp_resgroup_config where groupname='rg_multi_cpuset2'; - groupname | cpuset -------------------+-------- - rg_multi_cpuset2 | 0;1-2 + rg_multi_cpuset1 | 1;1 (1 row) DROP RESOURCE GROUP rg_multi_cpuset1; DROP -DROP RESOURCE GROUP rg_multi_cpuset2; -DROP REVOKE ALL ON busy FROM role1_cpuset_test; REVOKE diff --git a/src/test/isolation2/sql/resgroup/resgroup_syntax.sql b/src/test/isolation2/sql/resgroup/resgroup_syntax.sql index 693cf9e9ab3..bf0937b8663 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_syntax.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_syntax.sql @@ -77,7 +77,6 @@ CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0-,', memory_limit=5); CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-1', memory_limit=5); CREATE RESOURCE GROUP rg_test_group WITH (cpuset='3-1', memory_limit=5); CREATE RESOURCE GROUP rg_test_group WITH (cpuset=' 0 ', memory_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0;3-1', memory_limit=5); CREATE RESOURCE GROUP rg_test_group WITH (cpuset='4;a', memory_limit=5); CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-;4', memory_limit=5); CREATE RESOURCE GROUP rg_test_group WITH (cpuset=';5', memory_limit=5); @@ -97,7 +96,6 @@ ALTER RESOURCE GROUP rg_test_group set CPUSET '0-'; ALTER RESOURCE GROUP rg_test_group set CPUSET '-1'; ALTER RESOURCE GROUP rg_test_group set CPUSET '3-1'; ALTER RESOURCE GROUP rg_test_group set CPUSET ' 0 '; -ALTER RESOURCE GROUP rg_test_group set CPUSET '5;3-1'; ALTER RESOURCE GROUP rg_test_group set CPUSET '4;a'; ALTER RESOURCE GROUP rg_test_group set CPUSET '-;4'; ALTER RESOURCE GROUP rg_test_group set CPUSET ';5'; @@ -129,7 +127,7 @@ DROP RESOURCE GROUP rg_test_group; CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0'); SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; DROP RESOURCE GROUP rg_test_group; -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='1;4-5'); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0;0-1'); SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; DROP RESOURCE GROUP rg_test_group; @@ -336,9 +334,3 @@ DROP RESOURCE GROUP rg_test_group; CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=10); ALTER RESOURCE GROUP rg_test_group SET memory_limit 0; DROP RESOURCE GROUP rg_test_group; - --- positive: test master/segment cpuset -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='1;4-5'); -ALTER RESOURCE GROUP rg_test_group SET CPUSET '2;4-5'; -ALTER RESOURCE GROUP rg_test_group SET CPUSET '3;4-5'; -DROP RESOURCE GROUP rg_test_group; From 51fba837315c7e4fb6797d5ddcc0e2b2ceca3e21 Mon Sep 17 00:00:00 2001 From: Xing Guo Date: Tue, 6 Dec 2022 20:00:30 +0800 Subject: [PATCH 35/46] [7X] Fix multiple definition error when linking gpdb. (#14603) My linker complains that there's multiple definition of cgroupOpsRoutine and cgroupSystemInfo. We should declare the variable in header file with an extern tag and initialize it in one of the .c file. Since cgroupSystemInfo and cgroupOpsRoutine are required on multiple platforms, I initialize them in resgroup.c. --- src/backend/cdb/dispatcher/cdbdisp_query.c | 3 --- src/backend/executor/execMain.c | 3 --- src/backend/utils/resgroup/cgroup.c | 4 ---- src/backend/utils/resgroup/resgroup.c | 6 +++--- src/include/utils/cgroup.h | 4 ++-- 5 files changed, 5 insertions(+), 15 deletions(-) diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index 98374854bb1..3d55f03f3dc 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -49,9 +49,6 @@ #include "cdb/cdbcopy.h" #include "executor/execUtils.h" -extern CGroupOpsRoutine *cgroupOpsRoutine; -extern CGroupSystemInfo *cgroupSystemInfo; - #define QUERY_STRING_TRUNCATE_SIZE (1024) extern bool Test_print_direct_dispatch_info; diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index bc9807e2431..0fb1704e04c 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -115,9 +115,6 @@ #include "cdb/cdbutil.h" #include "cdb/cdbendpoint.h" -extern CGroupOpsRoutine *cgroupOpsRoutine; -extern CGroupSystemInfo *cgroupSystemInfo; - #define IS_PARALLEL_RETRIEVE_CURSOR(queryDesc) (queryDesc->ddesc && \ queryDesc->ddesc->parallelCursorName && \ strlen(queryDesc->ddesc->parallelCursorName) > 0) diff --git a/src/backend/utils/resgroup/cgroup.c b/src/backend/utils/resgroup/cgroup.c index 691172a173c..d92f5ae4bfd 100644 --- a/src/backend/utils/resgroup/cgroup.c +++ b/src/backend/utils/resgroup/cgroup.c @@ -23,10 +23,6 @@ #include #include - -CGroupOpsRoutine *cgroupOpsRoutine; -CGroupSystemInfo *cgroupSystemInfo; - /* cgroup component names. */ const char *component_names[CGROUP_COMPONENT_COUNT] = { diff --git a/src/backend/utils/resgroup/resgroup.c b/src/backend/utils/resgroup/resgroup.c index d0abff32e7f..572f7fb8da9 100644 --- a/src/backend/utils/resgroup/resgroup.c +++ b/src/backend/utils/resgroup/resgroup.c @@ -77,9 +77,6 @@ #include "utils/cgroup-ops-v1.h" #include "utils/cgroup-ops-dummy.h" -extern CGroupOpsRoutine *cgroupOpsRoutine; -extern CGroupSystemInfo *cgroupSystemInfo; - #define InvalidSlotId (-1) #define RESGROUP_MAX_SLOTS (MaxConnections) @@ -276,6 +273,9 @@ bool gp_resource_group_enable_cgroup_memory = false; bool gp_resource_group_enable_cgroup_swap = false; bool gp_resource_group_enable_cgroup_cpuset = false; +CGroupOpsRoutine *cgroupOpsRoutine = NULL; +CGroupSystemInfo *cgroupSystemInfo = NULL; + /* hooks */ resgroup_assign_hook_type resgroup_assign_hook = NULL; diff --git a/src/include/utils/cgroup.h b/src/include/utils/cgroup.h index 15ec9794117..27e5e9f369f 100644 --- a/src/include/utils/cgroup.h +++ b/src/include/utils/cgroup.h @@ -228,9 +228,9 @@ typedef struct CGroupOpsRoutine } CGroupOpsRoutine; /* The global function handler. */ -CGroupOpsRoutine *cgroupOpsRoutine; +extern CGroupOpsRoutine *cgroupOpsRoutine; /* The global system info. */ -CGroupSystemInfo *cgroupSystemInfo; +extern CGroupSystemInfo *cgroupSystemInfo; #endif /* CGROUP_H */ From cecfa9998923c86746967bc65db4bebdaa95b042 Mon Sep 17 00:00:00 2001 From: Haotian Chen <108248800+CharlieTTXX@users.noreply.github.com> Date: Fri, 16 Dec 2022 19:02:06 +0800 Subject: [PATCH 36/46] Simplify RG cpuset seperated by coordinator/segment (#14637) Simplify and refactor some codes of RG cpuset seperated by coordinator/segment. This commit if for enhenceing previous PR https://github.com/greenplum-db/gpdb/pull/14332. authored-by: chaotian --- src/backend/commands/resgroupcmds.c | 91 ++++++++++++++++------------- src/include/utils/resgroup.h | 5 -- 2 files changed, 52 insertions(+), 44 deletions(-) diff --git a/src/backend/commands/resgroupcmds.c b/src/backend/commands/resgroupcmds.c index 92703cf3f8c..dc1a4b390ac 100644 --- a/src/backend/commands/resgroupcmds.c +++ b/src/backend/commands/resgroupcmds.c @@ -1586,70 +1586,83 @@ checkCpusetSyntax(const char *cpuset) extern void checkCpuSetByRole(const char *cpuset) { - char **arraycpuset = (char **)palloc0(sizeof(char *) * CpuSetArrayLength); - char *copycpuset = (char *)palloc0(sizeof(char) * MaxCpuSetLength); - strcpy(copycpuset, cpuset); + char *first = NULL; + char *last = NULL; - int cnt = 0; - for (int i = 0; i < sizeof(cpuset); i++) + if (cpuset == NULL) { - if (cpuset[i] == ';') - cnt++; + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cpuset invalid"))); } - if (cnt == 0) - { - checkCpusetSyntax(copycpuset); - arraycpuset[0] = copycpuset; - } - else if (cnt == 1) + first = strchr(cpuset, ';'); + last = strrchr(cpuset, ';'); + /* + * If point first not equal last, that means + * ';' character exceed limit numbers. + */ + if (last != first) { - int iter = 0; - char *nextcpuset = strtok(copycpuset, ";"); - while (nextcpuset != NULL) - { - arraycpuset[iter++] = nextcpuset; - nextcpuset = strtok(NULL, ";"); - } - checkCpusetSyntax(arraycpuset[0]); - checkCpusetSyntax(arraycpuset[1]); - } - else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("cpuset invalid"))); + } + + if (first == NULL) + checkCpusetSyntax(cpuset); + else + { + char mcpu[MaxCpuSetLength] = {0}; + strncpy(mcpu, cpuset, first - cpuset); + + checkCpusetSyntax(mcpu); + checkCpusetSyntax(first + 1); + } - pfree(copycpuset); - pfree(arraycpuset); return; } /* * Seperate cpuset by coordinator and segment * Return as splitcpuset + * + * ex: + * cpuset = "1;4" + * then we should assign '1' to corrdinator and '4' to segment + * + * cpuset = "1" + * assign '1' to both coordinator and segment */ extern char * getCpuSetByRole(const char *cpuset) { - int iter = 0; char *splitcpuset = NULL; - char **arraycpuset = (char **)palloc0(sizeof(char *) * CpuSetArrayLength); - char *copycpuset = (char *)palloc0(sizeof(char) * MaxCpuSetLength); - strcpy(copycpuset, cpuset); - - char *nextcpuset = strtok(copycpuset, ";"); - while (nextcpuset != NULL) + if (cpuset == NULL) { - arraycpuset[iter++] = nextcpuset; - nextcpuset = strtok(NULL, ";"); + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("Unexpected cpuset invalid in getCpuSetByRole"))); } - /* Get result cpuset by gprole, on master or segment */ - if (Gp_role == GP_ROLE_EXECUTE && arraycpuset[1] != NULL) - splitcpuset = arraycpuset[1]; + char *first = strchr(cpuset, ';'); + if (first == NULL) + splitcpuset = (char *)cpuset; else - splitcpuset = arraycpuset[0]; + { + char *scpu = first + 1; + + /* Get result cpuset by IS_QUERY_DISPATCHER(), on master or segment */ + if (IS_QUERY_DISPATCHER()) + splitcpuset = scpu; + else + { + char *mcpu = (char *)palloc0(sizeof(char) * MaxCpuSetLength); + strncpy(mcpu, cpuset, first - cpuset); + splitcpuset = mcpu; + } + } return splitcpuset; } diff --git a/src/include/utils/resgroup.h b/src/include/utils/resgroup.h index a9e4f08d4e0..81a25d1ffa4 100644 --- a/src/include/utils/resgroup.h +++ b/src/include/utils/resgroup.h @@ -31,11 +31,6 @@ */ #define MaxCpuSetLength 1024 -/* - * The max length of cpuset array - */ -#define CpuSetArrayLength 2 - /* * Default value of cpuset */ From d11df4b8b0f9c1f60df6e12e77bd5c2d973c97b4 Mon Sep 17 00:00:00 2001 From: Zhenglong Li Date: Fri, 23 Dec 2022 15:36:32 +0800 Subject: [PATCH 37/46] add new semantics on resource group and removing memory model. (#14562) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After #14343, it's time to remove all the relevant codes and test cases about the resource group memory manager. 1. What this PR has done This PR did most 2 important things: First, the most important is to remove all the codes and test cases about resource group memory model, which includes the functions, variables, GUCs, etc. add new semantics on resource group and removing memory model. Since pg_resgroupcapability.reslimittype is consistent with the enumerated type ResGroupLimitType, when we delete `RESGROUP_LIMIT_TYPE_MEMORY and other content, there will be "holes". In order to avoid more PR and review work, this PR deleted memory model and added new semantics. The GUC this PR removed: gp_resource_group_memory_limit, gp_resgroup_memory_policy, memory_spill_ratio, gp_log_resgroup_memory, gp_resgroup_memory_policy_auto_fixed_mem gp_resource_group_cpu_ceiling_enforcement, gp_resgroup_print_operator_memory_limits gp_resource_group_enable_recalculate_query_mem. 2. New Resource Group Attributes and Limits New Resource group attributes and limits: - concurrenty. The maximum number of concurrent transactions, including active and idle transactions, that are permitted in the resource group. - cpu_hard_quoata_limit. The percentage of CPU resources hard limit to this resource group. This value indicates the maximum CPU ratio that the current group can use. - cpu_soft_priority. The current group CPU priority, the larger the value, the higher the priority, the more likely to be scheduled by the CPU, the default value is 100. - cpuset. The CPU cores to reserve for this resource group. First, let's take a look at the new resource management view of resource group: postgres=# select * from gp_toolkit.gp_resgroup_config; groupid | groupname | concurrency | cpu_hard_quota_limit | cpu_soft_priority | cpuset ---------+---------------+-------------+----------------------+-------------------+-------- 6437 | default_group | 20 | 20 | 100 | -1 6438 | admin_group | 10 | -1 | 300 | -1 6441 | system_group | 0 | 10 | 100 | -1 (3 rows) 2.1 What's the meaning of cpu_hard_quota_limit It can be seen that cpu_rate_limit is removed and replaced by cpu_hard_quota_limit, which indicates the upper limit of CPU resources that the current group can use. This is a percentage, taking 20 as an example, it means that the CPU resources used by the current group cannot exceed 20% of the total CPU resources of the Host. The sum of cpu_hard_quota_limit of all groups can exceed 100, and the range of this value is [1, 100] or -1, where 100 and -1 both mean that all CPU resources can be used, and no CPU resource limit is imposed on it. When we change the value of cpu_hard_quota_limit, will write cpu.cfs_period_us * ncores * cpu_hard_quota_limit / 100 to the file cpu.cfs_quota_us. 2.2 What's the meaning of cpu_soft_priority We have added cpu_soft_priority this field, which is used to indicate the CPU priority of the current group, corresponding to the dynamic running load weight in Linux CFS. The larger the value, the greater the weight of the group, and it will be scheduled more preferentially by the Linux scheduling process. The value range is [1, +∞], currently, the value cannot exceed 2^64 - 1. The default value is 100. When we change the value of cpu_soft_priority, will write (int64)(cpu_soft_priority * 1024 / 100) to the file cpu.shares. --- doc/src/sgml/ref/alter_resource_group.sgml | 6 +- doc/src/sgml/ref/create_resource_group.sgml | 7 +- src/backend/catalog/gp_toolkit.sql | 34 +- src/backend/cdb/dispatcher/cdbdisp_query.c | 16 - src/backend/commands/resgroupcmds.c | 368 +--- src/backend/executor/execMain.c | 73 +- src/backend/nodes/copyfuncs.c | 3 - src/backend/nodes/outfuncs.c | 3 - src/backend/nodes/readfuncs.c | 3 - src/backend/parser/gram.y | 44 +- src/backend/utils/misc/guc_gp.c | 88 +- src/backend/utils/mmgr/redzone_handler.c | 31 +- src/backend/utils/mmgr/runaway_cleaner.c | 19 +- src/backend/utils/mmgr/vmem_tracker.c | 32 +- .../utils/resgroup/cgroup-ops-linux-v1.c | 283 +-- src/backend/utils/resgroup/cgroup.c | 4 +- src/backend/utils/resgroup/resgroup.c | 1808 +---------------- src/backend/utils/resgroup/resgroup_helper.c | 50 +- src/backend/utils/resource_manager/memquota.c | 74 +- .../utils/resource_manager/resource_manager.c | 5 +- src/bin/pg_dump/pg_dumpall.c | 108 +- src/bin/psql/tab-complete.c | 2 +- src/include/catalog/pg_proc.dat | 12 +- src/include/catalog/pg_resgroup.h | 11 +- src/include/catalog/pg_resgroupcapability.dat | 21 +- src/include/cdb/cdbvars.h | 10 +- src/include/commands/resgroupcmds.h | 8 - src/include/nodes/plannodes.h | 3 - src/include/parser/kwlist.h | 5 +- src/include/utils/cgroup.h | 25 +- src/include/utils/linux-ops.h | 0 src/include/utils/resgroup.h | 60 +- src/include/utils/resscheduler.h | 1 - src/include/utils/sync_guc_name.h | 2 - src/include/utils/unsync_guc_name.h | 4 - .../expected/resgroup_current_1_group.out | 10 +- .../expected/resgroup_current_1_queue.out | 10 +- .../expected/resgroup_current_3_group.out | 10 +- .../expected/resgroup_current_3_queue.out | 10 +- .../expected/resgroup_other_2_group.out | 10 +- .../expected/resgroup_other_2_queue.out | 10 +- .../sql/resgroup_current_1_group.sql | 10 +- .../sql/resgroup_current_1_queue.sql | 10 +- .../sql/resgroup_current_3_group.sql | 10 +- .../sql/resgroup_current_3_queue.sql | 10 +- .../sql/resgroup_other_2_group.sql | 10 +- .../sql/resgroup_other_2_queue.sql | 10 +- .../resgroup/resgroup_alter_concurrency.out | 8 +- .../resgroup_alter_memory_spill_ratio.out | 74 - .../resgroup/resgroup_assign_slot_fail.out | 2 +- .../resgroup/resgroup_bypass_memory_limit.out | 960 --------- .../resgroup_cancel_terminate_concurrency.out | 10 +- .../resgroup/resgroup_concurrency.out | 24 +- .../resgroup_cpuset_empty_default.out | 2 +- .../expected/resgroup/resgroup_dumpinfo.out | 6 +- .../expected/resgroup/resgroup_functions.out | 4 +- .../resgroup/resgroup_large_group_id.out | 2 +- .../resgroup_memory_hashagg_spill.out | 161 -- .../resgroup_memory_hashjoin_spill.out | 105 - .../resgroup_memory_materialize_spill.out | 122 -- .../resgroup_memory_sisc_mat_sort.out | 102 - .../resgroup_memory_sisc_sort_spill.out | 100 - .../resgroup/resgroup_memory_sort_spill.out | 87 - .../resgroup/resgroup_memory_spilltodisk.out | 163 -- .../resgroup/resgroup_name_convention.out | 108 +- .../resgroup/resgroup_operator_memory.out | 28 +- .../resgroup/resgroup_parallel_queries.out | 30 +- .../expected/resgroup/resgroup_recreate.out | 4 +- .../resgroup/resgroup_seg_down_2pc.out | 2 +- .../resgroup_set_memory_spill_ratio.out | 197 -- .../expected/resgroup/resgroup_syntax.out | 498 ++--- .../resgroup/resgroup_transaction.out | 82 +- .../resgroup/resgroup_unassign_entrydb.out | 2 +- .../resgroup_unlimit_memory_spill_ratio.out | 155 -- .../expected/resgroup/resgroup_views.out | 88 +- .../input/resgroup/disable_resgroup.source | 9 +- .../input/resgroup/enable_resgroup.source | 43 - .../resgroup/resgroup_alter_memory.source | 545 ----- .../input/resgroup/resgroup_bypass.source | 228 +-- .../resgroup/resgroup_cpu_rate_limit.source | 56 +- .../input/resgroup/resgroup_cpuset.source | 37 +- .../resgroup/resgroup_memory_limit.source | 504 ----- .../resgroup/resgroup_memory_runaway.source | 171 -- .../resgroup/resgroup_memory_statistic.source | 155 -- .../input/resgroup/resgroup_move_query.source | 75 +- .../isolation2/isolation2_resgroup_schedule | 21 +- .../output/resgroup/disable_resgroup.source | 12 - .../output/resgroup/enable_resgroup.source | 42 +- .../resgroup/resgroup_alter_memory.source | 6 +- .../output/resgroup/resgroup_bypass.source | 385 +--- .../resgroup/resgroup_cpu_rate_limit.source | 108 +- .../output/resgroup/resgroup_cpuset.source | 50 +- .../resgroup/resgroup_memory_limit.source | 18 +- .../resgroup/resgroup_memory_runaway.source | 10 +- .../resgroup/resgroup_memory_statistic.source | 4 +- .../resgroup/resgroup_move_query.source | 152 +- .../resgroup/resgroup_alter_concurrency.sql | 193 +- .../resgroup_alter_memory_spill_ratio.sql | 39 - .../resgroup/resgroup_assign_slot_fail.sql | 2 +- .../resgroup/resgroup_bypass_memory_limit.sql | 482 ----- .../resgroup_cancel_terminate_concurrency.sql | 10 +- .../sql/resgroup/resgroup_concurrency.sql | 59 +- .../resgroup_cpuset_empty_default.sql | 2 +- .../sql/resgroup/resgroup_dumpinfo.sql | 9 +- .../sql/resgroup/resgroup_functions.sql | 4 +- .../sql/resgroup/resgroup_large_group_id.sql | 2 +- .../resgroup_memory_hashagg_spill.sql | 127 -- .../resgroup_memory_hashjoin_spill.sql | 72 - .../resgroup_memory_materialize_spill.sql | 104 - .../resgroup_memory_sisc_mat_sort.sql | 101 - .../resgroup_memory_sisc_sort_spill.sql | 105 - .../resgroup/resgroup_memory_sort_spill.sql | 68 - .../resgroup/resgroup_memory_spilltodisk.sql | 70 - .../sql/resgroup/resgroup_name_convention.sql | 94 +- .../sql/resgroup/resgroup_operator_memory.sql | 193 -- .../resgroup/resgroup_parallel_queries.sql | 37 +- .../sql/resgroup/resgroup_query_mem.sql | 63 - .../sql/resgroup/resgroup_recreate.sql | 6 +- .../sql/resgroup/resgroup_seg_down_2pc.sql | 2 +- .../resgroup_set_memory_spill_ratio.sql | 83 - .../sql/resgroup/resgroup_syntax.sql | 298 +-- .../sql/resgroup/resgroup_transaction.sql | 18 +- .../resgroup/resgroup_unassign_entrydb.sql | 2 +- .../resgroup_unlimit_memory_spill_ratio.sql | 85 - .../sql/resgroup/resgroup_views.sql | 6 - .../sql/resgroup/restore_default_resgroup.sql | 2 - src/test/regress/expected/resource_group.out | 6 +- .../expected/resource_group_cpuset.out | 8 +- .../regress/expected/resource_group_gucs.out | 36 - src/test/regress/regress_gp.c | 26 - src/test/regress/sql/resource_group.sql | 6 +- .../regress/sql/resource_group_cpuset.sql | 4 +- src/test/regress/sql/resource_group_gucs.sql | 20 - 133 files changed, 1087 insertions(+), 10457 deletions(-) create mode 100644 src/include/utils/linux-ops.h delete mode 100644 src/test/isolation2/expected/resgroup/resgroup_alter_memory_spill_ratio.out delete mode 100644 src/test/isolation2/expected/resgroup/resgroup_bypass_memory_limit.out delete mode 100644 src/test/isolation2/expected/resgroup/resgroup_memory_hashagg_spill.out delete mode 100644 src/test/isolation2/expected/resgroup/resgroup_memory_hashjoin_spill.out delete mode 100644 src/test/isolation2/expected/resgroup/resgroup_memory_materialize_spill.out delete mode 100644 src/test/isolation2/expected/resgroup/resgroup_memory_sisc_mat_sort.out delete mode 100644 src/test/isolation2/expected/resgroup/resgroup_memory_sisc_sort_spill.out delete mode 100644 src/test/isolation2/expected/resgroup/resgroup_memory_sort_spill.out delete mode 100644 src/test/isolation2/expected/resgroup/resgroup_memory_spilltodisk.out delete mode 100644 src/test/isolation2/expected/resgroup/resgroup_set_memory_spill_ratio.out delete mode 100644 src/test/isolation2/expected/resgroup/resgroup_unlimit_memory_spill_ratio.out delete mode 100644 src/test/isolation2/input/resgroup/resgroup_alter_memory.source delete mode 100644 src/test/isolation2/input/resgroup/resgroup_memory_limit.source delete mode 100644 src/test/isolation2/input/resgroup/resgroup_memory_runaway.source delete mode 100644 src/test/isolation2/input/resgroup/resgroup_memory_statistic.source delete mode 100644 src/test/isolation2/sql/resgroup/resgroup_alter_memory_spill_ratio.sql delete mode 100644 src/test/isolation2/sql/resgroup/resgroup_bypass_memory_limit.sql delete mode 100644 src/test/isolation2/sql/resgroup/resgroup_memory_hashagg_spill.sql delete mode 100644 src/test/isolation2/sql/resgroup/resgroup_memory_hashjoin_spill.sql delete mode 100644 src/test/isolation2/sql/resgroup/resgroup_memory_materialize_spill.sql delete mode 100644 src/test/isolation2/sql/resgroup/resgroup_memory_sisc_mat_sort.sql delete mode 100644 src/test/isolation2/sql/resgroup/resgroup_memory_sisc_sort_spill.sql delete mode 100644 src/test/isolation2/sql/resgroup/resgroup_memory_sort_spill.sql delete mode 100644 src/test/isolation2/sql/resgroup/resgroup_memory_spilltodisk.sql delete mode 100644 src/test/isolation2/sql/resgroup/resgroup_operator_memory.sql delete mode 100644 src/test/isolation2/sql/resgroup/resgroup_query_mem.sql delete mode 100644 src/test/isolation2/sql/resgroup/resgroup_set_memory_spill_ratio.sql delete mode 100644 src/test/isolation2/sql/resgroup/resgroup_unlimit_memory_spill_ratio.sql diff --git a/doc/src/sgml/ref/alter_resource_group.sgml b/doc/src/sgml/ref/alter_resource_group.sgml index ca0433a09d6..e07113af6ad 100755 --- a/doc/src/sgml/ref/alter_resource_group.sgml +++ b/doc/src/sgml/ref/alter_resource_group.sgml @@ -25,11 +25,9 @@ PostgreSQL documentation ALTER RESOURCE GROUP name SET group_attribute value where group_attribute is one of: CONCURRENCY integer - CPU_RATE_LIMIT integer + CPU_HARD_QUOTA_LIMIT integer + CPU_SOFT_PRIORITY integer CPUSET tuple - MEMORY_LIMIT integer - MEMORY_SHARED_QUOTA integer - MEMORY_SPILL_RATIO integer diff --git a/doc/src/sgml/ref/create_resource_group.sgml b/doc/src/sgml/ref/create_resource_group.sgml index 2d10b034109..7b193e0deb3 100755 --- a/doc/src/sgml/ref/create_resource_group.sgml +++ b/doc/src/sgml/ref/create_resource_group.sgml @@ -23,12 +23,9 @@ PostgreSQL documentation CREATE RESOURCE GROUP name WITH (group_attribute=value [, ... ]) where group_attribute is: - CPU_RATE_LIMIT=integer | CPUSET=tuple - MEMORY_LIMIT=integer + CPU_HARD_QUOTA_LIMIT=integer | CPUSET=tuple [ CONCURRENCY=integer ] - [ MEMORY_SHARED_QUOTA=integer ] - [ MEMORY_SPILL_RATIO=integer ] - [ MEMORY_AUDITOR= {vmtracker | cgroup} ] + [ CPU_SOFT_PRIORITY=integer ] diff --git a/src/backend/catalog/gp_toolkit.sql b/src/backend/catalog/gp_toolkit.sql index 5be9fe53bba..568edc9b517 100644 --- a/src/backend/catalog/gp_toolkit.sql +++ b/src/backend/catalog/gp_toolkit.sql @@ -1708,24 +1708,14 @@ CREATE VIEW gp_toolkit.gp_resgroup_config AS SELECT G.oid AS groupid , G.rsgname AS groupname , T1.value AS concurrency - , T2.value AS cpu_rate_limit - , T3.value AS memory_limit - , T4.value AS memory_shared_quota - , T5.value AS memory_spill_ratio - , CASE WHEN T6.value IS NULL THEN 'vmtracker' - WHEN T6.value='0' THEN 'vmtracker' - WHEN T6.value='1' THEN 'cgroup' - ELSE 'unknown' - END AS memory_auditor - , T7.value AS cpuset + , T2.value AS cpu_hard_quota_limit + , T3.value AS cpu_soft_priority + , T4.value AS cpuset FROM pg_resgroup G JOIN pg_resgroupcapability T1 ON G.oid = T1.resgroupid AND T1.reslimittype = 1 JOIN pg_resgroupcapability T2 ON G.oid = T2.resgroupid AND T2.reslimittype = 2 JOIN pg_resgroupcapability T3 ON G.oid = T3.resgroupid AND T3.reslimittype = 3 - JOIN pg_resgroupcapability T4 ON G.oid = T4.resgroupid AND T4.reslimittype = 4 - JOIN pg_resgroupcapability T5 ON G.oid = T5.resgroupid AND T5.reslimittype = 5 - LEFT JOIN pg_resgroupcapability T6 ON G.oid = T6.resgroupid AND T6.reslimittype = 6 - LEFT JOIN pg_resgroupcapability T7 ON G.oid = T7.resgroupid AND T7.reslimittype = 7 + LEFT JOIN pg_resgroupcapability T4 ON G.oid = T4.resgroupid AND T4.reslimittype = 4 ; GRANT SELECT ON gp_toolkit.gp_resgroup_config TO public; @@ -1763,7 +1753,6 @@ CREATE VIEW gp_toolkit.gp_resgroup_status_per_host AS , groupid , (json_each(cpu_usage)).key::smallint AS segment_id , (json_each(cpu_usage)).value AS cpu - , (json_each(memory_usage)).value AS memory FROM gp_toolkit.gp_resgroup_status ) SELECT @@ -1771,12 +1760,6 @@ CREATE VIEW gp_toolkit.gp_resgroup_status_per_host AS , s.groupid , c.hostname , round(avg((s.cpu)::text::numeric), 2) AS cpu - , sum((s.memory->'used' )::text::integer) AS memory_used - , sum((s.memory->'available' )::text::integer) AS memory_available - , sum((s.memory->'quota_used' )::text::integer) AS memory_quota_used - , sum((s.memory->'quota_available' )::text::integer) AS memory_quota_available - , sum((s.memory->'shared_used' )::text::integer) AS memory_shared_used - , sum((s.memory->'shared_available')::text::integer) AS memory_shared_available FROM s INNER JOIN pg_catalog.gp_segment_configuration AS c ON s.segment_id = c.content @@ -1805,7 +1788,6 @@ CREATE VIEW gp_toolkit.gp_resgroup_status_per_segment AS , groupid , (json_each(cpu_usage)).key::smallint AS segment_id , (json_each(cpu_usage)).value AS cpu - , (json_each(memory_usage)).value AS memory FROM gp_toolkit.gp_resgroup_status ) SELECT @@ -1813,13 +1795,7 @@ CREATE VIEW gp_toolkit.gp_resgroup_status_per_segment AS , s.groupid , c.hostname , s.segment_id - , sum((s.cpu )::text::numeric) AS cpu - , sum((s.memory->'used' )::text::integer) AS memory_used - , sum((s.memory->'available' )::text::integer) AS memory_available - , sum((s.memory->'quota_used' )::text::integer) AS memory_quota_used - , sum((s.memory->'quota_available' )::text::integer) AS memory_quota_available - , sum((s.memory->'shared_used' )::text::integer) AS memory_shared_used - , sum((s.memory->'shared_available')::text::integer) AS memory_shared_available + , sum((s.cpu)::text::numeric) AS cpu FROM s INNER JOIN pg_catalog.gp_segment_configuration AS c ON s.segment_id = c.content diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index 3d55f03f3dc..c920ec58ae6 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -36,7 +36,6 @@ #include "utils/faultinjector.h" #include "utils/resgroup.h" #include "utils/resource_manager.h" -#include "utils/cgroup.h" #include "utils/session_state.h" #include "utils/typcache.h" #include "miscadmin.h" @@ -267,21 +266,6 @@ CdbDispatchPlan(struct QueryDesc *queryDesc, verify_shared_snapshot_ready(gp_command_count); } - /* In the final stage, add the resource information needed for QE by the resource group */ - stmt->total_memory_coordinator = 0; - stmt->nsegments_coordinator = 0; - - if (IsResGroupEnabled() && gp_resource_group_enable_recalculate_query_mem && - memory_spill_ratio != RESGROUP_FALLBACK_MEMORY_SPILL_RATIO) - { - /* - * We enable resource group re-calculate the query_mem on QE, and we are not in - * fall back mode (use statement_mem). - */ - stmt->total_memory_coordinator = cgroupOpsRoutine->gettotalmemory(); - stmt->nsegments_coordinator = ResGroupGetHostPrimaryCount(); - } - cdbdisp_dispatchX(queryDesc, planRequiresTxn, cancelOnError); } diff --git a/src/backend/commands/resgroupcmds.c b/src/backend/commands/resgroupcmds.c index dc1a4b390ac..18efac040b5 100644 --- a/src/backend/commands/resgroupcmds.c +++ b/src/backend/commands/resgroupcmds.c @@ -45,43 +45,21 @@ #include "catalog/gp_indexing.h" #define RESGROUP_DEFAULT_CONCURRENCY (20) -#define RESGROUP_DEFAULT_MEM_SHARED_QUOTA (80) -#define RESGROUP_DEFAULT_MEM_SPILL_RATIO RESGROUP_FALLBACK_MEMORY_SPILL_RATIO -#define RESGROUP_DEFAULT_MEMORY_LIMIT RESGROUP_UNLIMITED_MEMORY_LIMIT - -#define RESGROUP_DEFAULT_MEM_AUDITOR (RESGROUP_MEMORY_AUDITOR_VMTRACKER) -#define RESGROUP_INVALID_MEM_AUDITOR (-1) +#define RESGROUP_DEFAULT_CPU_SOFT_PRIORITY (100) #define RESGROUP_MIN_CONCURRENCY (0) #define RESGROUP_MAX_CONCURRENCY (MaxConnections) -#define RESGROUP_MIN_CPU_RATE_LIMIT (1) -#define RESGROUP_MAX_CPU_RATE_LIMIT (100) - -#define RESGROUP_MIN_MEMORY_LIMIT (0) -#define RESGROUP_MAX_MEMORY_LIMIT (100) - -#define RESGROUP_MIN_MEMORY_SHARED_QUOTA (0) -#define RESGROUP_MAX_MEMORY_SHARED_QUOTA (100) - -#define RESGROUP_MIN_MEMORY_SPILL_RATIO (0) -#define RESGROUP_MAX_MEMORY_SPILL_RATIO (100) +#define RESGROUP_MAX_CPU_HARD_QUOTA_LIMIT (100) +#define RESGROUP_MIN_CPU_HARD_QUOTA_LIMIT (1) -/* - * The names must be in the same order as ResGroupMemAuditorType. - */ -static const char *ResGroupMemAuditorName[] = -{ - "vmtracker", // RESGROUP_MEMORY_AUDITOR_VMTRACKER - "cgroup" // RESGROUP_MEMORY_AUDITOR_CGROUP -}; +#define RESGROUP_MIN_CPU_SOFT_PRIORITY (1) static int str2Int(const char *str, const char *prop); static ResGroupLimitType getResgroupOptionType(const char* defname); static ResGroupCap getResgroupOptionValue(DefElem *defel, int type); static const char *getResgroupOptionName(ResGroupLimitType type); static void checkResgroupCapLimit(ResGroupLimitType type, ResGroupCap value); -static void checkResgroupCapConflicts(ResGroupCaps *caps); static void parseStmtOptions(CreateResourceGroupStmt *stmt, ResGroupCaps *caps); static void validateCapabilities(Relation rel, Oid groupid, ResGroupCaps *caps, bool newGroup); static void insertResgroupCapabilityEntry(Relation rel, Oid groupid, uint16 type, const char *value); @@ -96,9 +74,9 @@ static void checkAuthIdForDrop(Oid groupId); static void createResgroupCallback(XactEvent event, void *arg); static void dropResgroupCallback(XactEvent event, void *arg); static void alterResgroupCallback(XactEvent event, void *arg); -static int getResGroupMemAuditor(char *name); static void checkCpusetSyntax(const char *cpuset); + /* * CREATE RESOURCE GROUP */ @@ -124,8 +102,7 @@ CreateResourceGroup(CreateResourceGroupStmt *stmt) errmsg("must be superuser to create resource groups"))); /* - * Check for an illegal name ('none' is used to signify no group in ALTER - * ROLE). + * Check for an illegal name ('none' is used to signify no group in ALTER ROLE). */ if (strcmp(stmt->name, "none") == 0) ereport(ERROR, @@ -136,15 +113,13 @@ CreateResourceGroup(CreateResourceGroupStmt *stmt) parseStmtOptions(stmt, &caps); /* - * both CREATE and ALTER resource group need check the sum of cpu_rate_limit - * and memory_limit and make sure the sum don't exceed 100. To make it simple, - * acquire ExclusiveLock lock on pg_resgroupcapability at the beginning - * of CREATE and ALTER + * Both CREATE and ALTER resource group need check the intersection of cpuset, + * to make it simple, acquire ExclusiveLock lock on pg_resgroupcapability at + * the beginning of CREATE and ALTER. */ pg_resgroupcapability_rel = table_open(ResGroupCapabilityRelationId, ExclusiveLock); pg_resgroup_rel = table_open(ResGroupRelationId, RowExclusiveLock); - /* Check if MaxResourceGroups limit is reached */ sscan = systable_beginscan(pg_resgroup_rel, ResGroupRsgnameIndexId, false, NULL, 0, NULL); nResGroups = 0; @@ -152,6 +127,7 @@ CreateResourceGroup(CreateResourceGroupStmt *stmt) nResGroups++; systable_endscan(sscan); + /* Check if MaxResourceGroups limit is reached */ if (nResGroups >= MaxResourceGroups) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), @@ -240,13 +216,12 @@ CreateResourceGroup(CreateResourceGroupStmt *stmt) /* Create os dependent part for this resource group */ cgroupOpsRoutine->createcgroup(groupid); - cgroupOpsRoutine->setmemorylimit(groupid, caps.memLimit); - - if (caps.cpuRateLimit != CPU_RATE_LIMIT_DISABLED) + if (CpusetIsEmpty(caps.cpuset)) { - cgroupOpsRoutine->setcpulimit(groupid, caps.cpuRateLimit); + cgroupOpsRoutine->setcpulimit(groupid, caps.cpuHardQuotaLimit); + cgroupOpsRoutine->setcpupriority(groupid, caps.cpuSoftPriority); } - else if (!CpusetIsEmpty(caps.cpuset)) + else { EnsureCpusetIsAvailable(ERROR); @@ -428,17 +403,16 @@ AlterResourceGroup(AlterResourceGroupStmt *stmt) } /* - * In validateCapabilities() we scan all the resource groups - * to check whether the total cpu_rate_limit exceed 100 or not. - * We use ExclusiveLock here to prevent concurrent - * increase on different resource group. - * We can't use AccessExclusiveLock here, the reason is that, - * if there is a database recovery happened when run "alter resource group" - * and acquire this kind of lock, the initialization of resource group - * in function InitResGroups will be pending during database startup, - * since this function will open this table with AccessShareLock, - * AccessExclusiveLock is not compatible with any other lock. - * ExclusiveLock and AccessShareLock are compatible. + * We use ExclusiveLock here to prevent concurrent increase on different + * resource group. + * + * We can't use AccessExclusiveLock here, the reason is that, if there is + * a database recovery happened when run "alter resource group" and acquire + * this kind of lock, the initialization of resource group in function + * InitResGroups will be pending during database startup, since this function + * will open this table with AccessShareLock, AccessExclusiveLock is not + * compatible with any other lock. ExclusiveLock and AccessShareLock are + * compatible. */ pg_resgroupcapability_rel = heap_open(ResGroupCapabilityRelationId, ExclusiveLock); @@ -450,44 +424,38 @@ AlterResourceGroup(AlterResourceGroupStmt *stmt) switch (limitType) { case RESGROUP_LIMIT_TYPE_CPU: - caps.cpuRateLimit = value; + caps.cpuHardQuotaLimit = value; SetCpusetEmpty(caps.cpuset, sizeof(caps.cpuset)); break; - case RESGROUP_LIMIT_TYPE_MEMORY: - caps.memLimit = value; + case RESGROUP_LIMIT_TYPE_CPU_SHARES: + caps.cpuSoftPriority = value; break; case RESGROUP_LIMIT_TYPE_CONCURRENCY: caps.concurrency = value; break; - case RESGROUP_LIMIT_TYPE_MEMORY_SHARED_QUOTA: - caps.memSharedQuota = value; - break; - case RESGROUP_LIMIT_TYPE_MEMORY_SPILL_RATIO: - caps.memSpillRatio = value; - break; - case RESGROUP_LIMIT_TYPE_MEMORY_AUDITOR: - caps.memAuditor = value; - break; case RESGROUP_LIMIT_TYPE_CPUSET: strlcpy(caps.cpuset, cpuset, sizeof(caps.cpuset)); - caps.cpuRateLimit = CPU_RATE_LIMIT_DISABLED; + caps.cpuHardQuotaLimit = CPU_HARD_QUOTA_LIMIT_DISABLED; + caps.cpuSoftPriority = RESGROUP_DEFAULT_CPU_SOFT_PRIORITY; break; default: break; } - checkResgroupCapConflicts(&caps); - validateCapabilities(pg_resgroupcapability_rel, groupid, &caps, false); - /* cpuset & cpu_rate_limit can not coexist - * if cpuset is active, then cpu_rate_limit must set to CPU_RATE_LIMIT_DISABLED - * if cpu_rate_limit is active, then cpuset must set to "" */ + /* cpuset & cpu_hard_quota_limit can not coexist. + * if cpuset is active, then cpu_hard_quota_limit must set to CPU_RATE_LIMIT_DISABLED, + * if cpu_hard_quota_limit is active, then cpuset must set to "" */ if (limitType == RESGROUP_LIMIT_TYPE_CPUSET) { updateResgroupCapabilityEntry(pg_resgroupcapability_rel, - groupid, RESGROUP_LIMIT_TYPE_CPU, - CPU_RATE_LIMIT_DISABLED, ""); + groupid, RESGROUP_LIMIT_TYPE_CPU, + CPU_HARD_QUOTA_LIMIT_DISABLED, ""); + updateResgroupCapabilityEntry(pg_resgroupcapability_rel, + groupid, RESGROUP_LIMIT_TYPE_CPU_SHARES, + RESGROUP_DEFAULT_CPU_SOFT_PRIORITY, ""); + updateResgroupCapabilityEntry(pg_resgroupcapability_rel, groupid, RESGROUP_LIMIT_TYPE_CPUSET, 0, caps.cpuset); @@ -599,24 +567,12 @@ GetResGroupCapabilities(Relation rel, Oid groupId, ResGroupCaps *resgroupCaps) getResgroupOptionName(type)); break; case RESGROUP_LIMIT_TYPE_CPU: - resgroupCaps->cpuRateLimit = str2Int(value, - getResgroupOptionName(type)); + resgroupCaps->cpuHardQuotaLimit = str2Int(value, + getResgroupOptionName(type)); break; - case RESGROUP_LIMIT_TYPE_MEMORY: - resgroupCaps->memLimit = str2Int(value, - getResgroupOptionName(type)); - break; - case RESGROUP_LIMIT_TYPE_MEMORY_SHARED_QUOTA: - resgroupCaps->memSharedQuota = str2Int(value, - getResgroupOptionName(type)); - break; - case RESGROUP_LIMIT_TYPE_MEMORY_SPILL_RATIO: - resgroupCaps->memSpillRatio = str2Int(value, - getResgroupOptionName(type)); - break; - case RESGROUP_LIMIT_TYPE_MEMORY_AUDITOR: - resgroupCaps->memAuditor = str2Int(value, - getResgroupOptionName(type)); + case RESGROUP_LIMIT_TYPE_CPU_SHARES: + resgroupCaps->cpuSoftPriority = str2Int(value, + getResgroupOptionName(type)); break; case RESGROUP_LIMIT_TYPE_CPUSET: strlcpy(resgroupCaps->cpuset, value, sizeof(resgroupCaps->cpuset)); @@ -771,11 +727,6 @@ ResGroupCheckForRole(Oid groupId) /* Load current resource group capabilities */ GetResGroupCapabilities(pg_resgroupcapability_rel, groupId, &caps); - if (caps.memAuditor == RESGROUP_MEMORY_AUDITOR_CGROUP) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("you cannot assign a role to this resource group"), - errdetail("The memory_auditor property for this group is not the default."))); heap_close(pg_resgroupcapability_rel, AccessShareLock); } @@ -790,20 +741,14 @@ ResGroupCheckForRole(Oid groupId) static ResGroupLimitType getResgroupOptionType(const char* defname) { - if (strcmp(defname, "cpu_rate_limit") == 0) + if (strcmp(defname, "cpu_hard_quota_limit") == 0) return RESGROUP_LIMIT_TYPE_CPU; - else if (strcmp(defname, "memory_limit") == 0) - return RESGROUP_LIMIT_TYPE_MEMORY; else if (strcmp(defname, "concurrency") == 0) return RESGROUP_LIMIT_TYPE_CONCURRENCY; - else if (strcmp(defname, "memory_shared_quota") == 0) - return RESGROUP_LIMIT_TYPE_MEMORY_SHARED_QUOTA; - else if (strcmp(defname, "memory_spill_ratio") == 0) - return RESGROUP_LIMIT_TYPE_MEMORY_SPILL_RATIO; - else if (strcmp(defname, "memory_auditor") == 0) - return RESGROUP_LIMIT_TYPE_MEMORY_AUDITOR; else if (strcmp(defname, "cpuset") == 0) return RESGROUP_LIMIT_TYPE_CPUSET; + else if (strcmp(defname, "cpu_soft_priority") == 0) + return RESGROUP_LIMIT_TYPE_CPU_SHARES; else return RESGROUP_LIMIT_TYPE_UNKNOWN; } @@ -816,15 +761,7 @@ getResgroupOptionValue(DefElem *defel, int type) { int64 value; - if (type == RESGROUP_LIMIT_TYPE_MEMORY_AUDITOR) - { - char *auditor_name = defGetString(defel); - value = getResGroupMemAuditor(auditor_name); - } - else - { - value = defGetInt64(defel); - } + value = defGetInt64(defel); if (value < INT_MIN || value > INT_MAX) ereport(ERROR, @@ -848,17 +785,11 @@ getResgroupOptionName(ResGroupLimitType type) case RESGROUP_LIMIT_TYPE_CONCURRENCY: return "concurrency"; case RESGROUP_LIMIT_TYPE_CPU: - return "cpu_rate_limit"; - case RESGROUP_LIMIT_TYPE_MEMORY: - return "memory_limit"; - case RESGROUP_LIMIT_TYPE_MEMORY_SHARED_QUOTA: - return "memory_shared_quota"; - case RESGROUP_LIMIT_TYPE_MEMORY_SPILL_RATIO: - return "memory_spill_ratio"; - case RESGROUP_LIMIT_TYPE_MEMORY_AUDITOR: - return "memory_auditor"; + return "cpu_hard_quota_limit"; case RESGROUP_LIMIT_TYPE_CPUSET: return "cpuset"; + case RESGROUP_LIMIT_TYPE_CPU_SHARES: + return "cpu_soft_priority"; default: return "unknown"; } @@ -882,53 +813,21 @@ checkResgroupCapLimit(ResGroupLimitType type, int value) break; case RESGROUP_LIMIT_TYPE_CPU: - if (value < RESGROUP_MIN_CPU_RATE_LIMIT || - value > RESGROUP_MAX_CPU_RATE_LIMIT) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("cpu_rate_limit range is [%d, %d]", - RESGROUP_MIN_CPU_RATE_LIMIT, - RESGROUP_MAX_CPU_RATE_LIMIT))); - break; - - case RESGROUP_LIMIT_TYPE_MEMORY: - if (value < RESGROUP_MIN_MEMORY_LIMIT || - value > RESGROUP_MAX_MEMORY_LIMIT) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("memory_limit range is [%d, %d]", - RESGROUP_MIN_MEMORY_LIMIT, - RESGROUP_MAX_MEMORY_LIMIT))); - break; - - case RESGROUP_LIMIT_TYPE_MEMORY_SHARED_QUOTA: - if (value < RESGROUP_MIN_MEMORY_SHARED_QUOTA || - value > RESGROUP_MAX_MEMORY_SHARED_QUOTA) + if (value > RESGROUP_MAX_CPU_HARD_QUOTA_LIMIT || + (value < RESGROUP_MIN_CPU_HARD_QUOTA_LIMIT && value != CPU_HARD_QUOTA_LIMIT_DISABLED)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("memory_shared_quota range is [%d, %d]", - RESGROUP_MIN_MEMORY_SHARED_QUOTA, - RESGROUP_MAX_MEMORY_SHARED_QUOTA))); + errmsg("cpu_hard_quota_limit range is [%d, %d] or equals to %d", + RESGROUP_MIN_CPU_HARD_QUOTA_LIMIT, RESGROUP_MAX_CPU_HARD_QUOTA_LIMIT, + CPU_HARD_QUOTA_LIMIT_DISABLED))); break; - case RESGROUP_LIMIT_TYPE_MEMORY_SPILL_RATIO: - if (value < RESGROUP_MIN_MEMORY_SPILL_RATIO || - value > RESGROUP_MAX_MEMORY_SPILL_RATIO) + case RESGROUP_LIMIT_TYPE_CPU_SHARES: + if (value < RESGROUP_MIN_CPU_SOFT_PRIORITY) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("memory_spill_ratio range is [%d, %d]", - RESGROUP_MIN_MEMORY_SPILL_RATIO, - RESGROUP_MAX_MEMORY_SPILL_RATIO))); - break; - - case RESGROUP_LIMIT_TYPE_MEMORY_AUDITOR: - if (value != RESGROUP_MEMORY_AUDITOR_VMTRACKER && - value != RESGROUP_MEMORY_AUDITOR_CGROUP) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("memory_auditor should be \"%s\" or \"%s\"", - ResGroupMemAuditorName[RESGROUP_MEMORY_AUDITOR_VMTRACKER], - ResGroupMemAuditorName[RESGROUP_MEMORY_AUDITOR_CGROUP]))); + errmsg("cpu_soft_priority range is [%d, +∞]", + RESGROUP_MIN_CPU_SOFT_PRIORITY))); break; default: @@ -937,48 +836,6 @@ checkResgroupCapLimit(ResGroupLimitType type, int value) } } -/* - * Check conflict settings in caps. - */ -static void -checkResgroupCapConflicts(ResGroupCaps *caps) -{ - /* - * When memory_limit is unlimited the memory_spill_ratio must be set to - * 'fallback' mode to use the statement_mem. - */ - if (caps->memLimit == RESGROUP_UNLIMITED_MEMORY_LIMIT && - caps->memSpillRatio != RESGROUP_FALLBACK_MEMORY_SPILL_RATIO) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("when memory_limit is unlimited memory_spill_ratio must be set to %d", - RESGROUP_FALLBACK_MEMORY_SPILL_RATIO))); - - /* - * When memory_auditor is cgroup the concurrency must be 0. - */ - if (caps->memAuditor == RESGROUP_MEMORY_AUDITOR_CGROUP && - caps->concurrency != 0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("resource group concurrency must be 0 when group memory_auditor is %s", - ResGroupMemAuditorName[RESGROUP_MEMORY_AUDITOR_CGROUP]))); - - /* - * The cgroup memory_auditor should not be used without a properly - * configured cgroup memory directory. - */ - if (caps->memAuditor == RESGROUP_MEMORY_AUDITOR_CGROUP && - !gp_resource_group_enable_cgroup_memory) - { - ereport(ERROR, - (errcode(ERRCODE_GP_FEATURE_NOT_CONFIGURED), - errmsg("cgroup is not properly configured for the 'cgroup' memory auditor"), - errhint("Extra cgroup configurations are required to enable this feature, " - "please refer to the Cloudberry Documentation for details"))); - } -} - /* * Parse a statement and store the settings in options. * @@ -1014,8 +871,10 @@ parseStmtOptions(CreateResourceGroupStmt *stmt, ResGroupCaps *caps) { const char *cpuset = defGetString(defel); strlcpy(caps->cpuset, cpuset, sizeof(caps->cpuset)); - checkCpuSetByRole(cpuset); - caps->cpuRateLimit = CPU_RATE_LIMIT_DISABLED; + checkCpuSetByRole(cpuset); + caps->cpuHardQuotaLimit = CPU_HARD_QUOTA_LIMIT_DISABLED; + caps->cpuSoftPriority = RESGROUP_DEFAULT_CPU_SOFT_PRIORITY; + } else { @@ -1028,20 +887,11 @@ parseStmtOptions(CreateResourceGroupStmt *stmt, ResGroupCaps *caps) caps->concurrency = value; break; case RESGROUP_LIMIT_TYPE_CPU: - caps->cpuRateLimit = value; + caps->cpuHardQuotaLimit = value; SetCpusetEmpty(caps->cpuset, sizeof(caps->cpuset)); break; - case RESGROUP_LIMIT_TYPE_MEMORY: - caps->memLimit = value; - break; - case RESGROUP_LIMIT_TYPE_MEMORY_SHARED_QUOTA: - caps->memSharedQuota = value; - break; - case RESGROUP_LIMIT_TYPE_MEMORY_SPILL_RATIO: - caps->memSpillRatio = value; - break; - case RESGROUP_LIMIT_TYPE_MEMORY_AUDITOR: - caps->memAuditor = value; + case RESGROUP_LIMIT_TYPE_CPU_SHARES: + caps->cpuSoftPriority = value; break; default: break; @@ -1056,30 +906,20 @@ parseStmtOptions(CreateResourceGroupStmt *stmt, ResGroupCaps *caps) (mask & (1 << RESGROUP_LIMIT_TYPE_CPUSET))) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("can't specify both cpu_rate_limit and cpuset"))); + errmsg("can't specify both cpu_hard_quota_limit and cpuset"))); if (!(mask & (1 << RESGROUP_LIMIT_TYPE_CPU)) && !(mask & (1 << RESGROUP_LIMIT_TYPE_CPUSET))) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("must specify cpu_rate_limit or cpuset"))); + errmsg("must specify cpu_hard_quota_limit or cpuset"))); if (!(mask & (1 << RESGROUP_LIMIT_TYPE_CONCURRENCY))) caps->concurrency = RESGROUP_DEFAULT_CONCURRENCY; - if (!(mask & (1 << RESGROUP_LIMIT_TYPE_MEMORY))) - caps->memLimit = RESGROUP_DEFAULT_MEMORY_LIMIT; - - if (!(mask & (1 << RESGROUP_LIMIT_TYPE_MEMORY_SHARED_QUOTA))) - caps->memSharedQuota = RESGROUP_DEFAULT_MEM_SHARED_QUOTA; - - if (!(mask & (1 << RESGROUP_LIMIT_TYPE_MEMORY_SPILL_RATIO))) - caps->memSpillRatio = RESGROUP_DEFAULT_MEM_SPILL_RATIO; - - if (!(mask & (1 << RESGROUP_LIMIT_TYPE_MEMORY_AUDITOR))) - caps->memAuditor = RESGROUP_DEFAULT_MEM_AUDITOR; - - checkResgroupCapConflicts(caps); + if ((mask & (1 << RESGROUP_LIMIT_TYPE_CPU)) && + !(mask & (1 << RESGROUP_LIMIT_TYPE_CPU_SHARES))) + caps->cpuSoftPriority = RESGROUP_DEFAULT_CPU_SOFT_PRIORITY; } /* @@ -1155,25 +995,13 @@ insertResgroupCapabilities(Relation rel, Oid groupId, ResGroupCaps *caps) insertResgroupCapabilityEntry(rel, groupId, RESGROUP_LIMIT_TYPE_CONCURRENCY, value); - snprintf(value, sizeof(value), "%d", caps->cpuRateLimit); + snprintf(value, sizeof(value), "%d", caps->cpuHardQuotaLimit); insertResgroupCapabilityEntry(rel, groupId, RESGROUP_LIMIT_TYPE_CPU, value); - snprintf(value, sizeof(value), "%d", caps->memLimit); - insertResgroupCapabilityEntry(rel, groupId, - RESGROUP_LIMIT_TYPE_MEMORY, value); - - snprintf(value, sizeof(value), "%d", caps->memSharedQuota); + snprintf(value, sizeof(value), "%d", caps->cpuSoftPriority); insertResgroupCapabilityEntry(rel, groupId, - RESGROUP_LIMIT_TYPE_MEMORY_SHARED_QUOTA, value); - - snprintf(value, sizeof(value), "%d", caps->memSpillRatio); - insertResgroupCapabilityEntry(rel, groupId, - RESGROUP_LIMIT_TYPE_MEMORY_SPILL_RATIO, value); - - snprintf(value, sizeof(value), "%d", caps->memAuditor); - insertResgroupCapabilityEntry(rel, groupId, - RESGROUP_LIMIT_TYPE_MEMORY_AUDITOR, value); + RESGROUP_LIMIT_TYPE_CPU_SHARES, value); insertResgroupCapabilityEntry(rel, groupId, RESGROUP_LIMIT_TYPE_CPUSET, caps->cpuset); @@ -1275,8 +1103,6 @@ validateCapabilities(Relation rel, { HeapTuple tuple; SysScanDesc sscan; - int totalCpu = caps->cpuRateLimit; - int totalMem = caps->memLimit; char cpusetAll[MaxCpuSetLength] = {0}; char cpusetMissing[MaxCpuSetLength] = {0}; Bitmapset *bmsCurrent = NULL; @@ -1334,7 +1160,6 @@ validateCapabilities(Relation rel, ResGroupLimitType reslimittype; Oid resgroupid; char *valueStr; - int value; bool isNull; groupIdDatum = heap_getattr(tuple, Anum_pg_resgroupcapability_resgroupid, @@ -1359,32 +1184,8 @@ validateCapabilities(Relation rel, valueDatum = heap_getattr(tuple, Anum_pg_resgroupcapability_value, rel->rd_att, &isNull); - if (reslimittype == RESGROUP_LIMIT_TYPE_CPU) - { - valueStr = TextDatumGetCString(valueDatum); - value = str2Int(valueStr, getResgroupOptionName(reslimittype)); - if (value != CPU_RATE_LIMIT_DISABLED) - { - totalCpu += value; - if (totalCpu > RESGROUP_MAX_CPU_RATE_LIMIT) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("total cpu_rate_limit exceeded the limit of %d", - RESGROUP_MAX_CPU_RATE_LIMIT))); - } - } - else if (reslimittype == RESGROUP_LIMIT_TYPE_MEMORY) - { - valueStr = TextDatumGetCString(valueDatum); - value = str2Int(valueStr, getResgroupOptionName(reslimittype)); - totalMem += value; - if (totalMem > RESGROUP_MAX_MEMORY_LIMIT) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("total memory_limit exceeded the limit of %d", - RESGROUP_MAX_MEMORY_LIMIT))); - } - else if (reslimittype == RESGROUP_LIMIT_TYPE_CPUSET) + /* we need to check the configuration of cpuset for intersection. */ + if (reslimittype == RESGROUP_LIMIT_TYPE_CPUSET) { /* * do the check when resource group is activated @@ -1532,23 +1333,6 @@ str2Int(const char *str, const char *prop) return floor(val); } -/* - * Get memory auditor from auditor name. - */ -static int -getResGroupMemAuditor(char *name) -{ - int index; - - for (index = 0; index < RESGROUP_MEMORY_AUDITOR_COUNT; index ++) - { - if (strcmp(ResGroupMemAuditorName[index], name) == 0) - return index; - } - - return RESGROUP_INVALID_MEM_AUDITOR; -} - /* * check whether the cpuset value is syntactically right */ diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 0fb1704e04c..e6713aa20aa 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -233,75 +233,8 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) elog(GP_RESMANAGER_MEMORY_LOG_LEVEL, "query requested %.0fKB of memory", (double) queryDesc->plannedstmt->query_mem / 1024.0); } - } - - /** - * Distribute memory to operators. - * - * There are some statements that do not go through the resource queue, so we cannot - * put in a strong assert here. Someday, we should fix resource queues. - */ - if (queryDesc->plannedstmt->query_mem > 0) - { - /* - * Whether we should skip operator memory assignment - * - We should never skip operator memory assignment on QD. - * - On QE, not skip in case of resource group enabled, and customer allow QE re-calculate query_mem, - * as the GUC `gp_resource_group_enable_recalculate_query_mem` set to on. - */ - bool should_skip_operator_memory_assign = true; - - if (Gp_role == GP_ROLE_EXECUTE) - { - /* - * If resource group is enabled, we should re-calculate query_mem on QE, because the memory - * of the coordinator and segment nodes or the number of instance could be different. - * - * On QE, we only try to recalculate query_mem if resource group enabled. Otherwise, we will skip this - * and the next operator memory assignment if resource queue enabled - */ - if (IsResGroupEnabled()) - { - int32 total_memory_coordinator = queryDesc->plannedstmt->total_memory_coordinator; - int nsegments_coordinator = queryDesc->plannedstmt->nsegments_coordinator; - - /* - * memSpill is not in fallback mode, and we enable resource group re-calculate the query_mem on QE, - * then re-calculate the query_mem and re-compute operatorMemKB using this new value - */ - if (total_memory_coordinator != 0 && nsegments_coordinator != 0) - { - should_skip_operator_memory_assign = false; - - /* Get total system memory on the QE in MB */ - int total_memory_segment = cgroupOpsRoutine->gettotalmemory(); - int nsegments_segment = ResGroupGetHostPrimaryCount(); - uint64 coordinator_query_mem = queryDesc->plannedstmt->query_mem; - - /* - * In the resource group environment, when we calculate query_mem, we can roughly use the following - * formula: - * - * query_mem = (total_memory * gp_resource_group_memory_limit * memory_limit / nsegments) * memory_spill_ratio / concurrency - * - * Only total_memory and nsegments could differ between QD and QE, so query_mem is proportional to - * the system's available virtual memory and inversely proportional to the number of instances. - */ - queryDesc->plannedstmt->query_mem *= (total_memory_segment * 1.0 / nsegments_segment) / - (total_memory_coordinator * 1.0 / nsegments_coordinator); - - elog(DEBUG1, "re-calculate query_mem, original QD's query_mem: %.0fKB, after recalculation QE's query_mem: %.0fKB", - (double) coordinator_query_mem / 1024.0 , (double) queryDesc->plannedstmt->query_mem / 1024.0); - } - } - } - else - { - /* On QD, we always traverse the plan tree and compute operatorMemKB */ - should_skip_operator_memory_assign = false; - } - if (!should_skip_operator_memory_assign) + if (queryDesc->plannedstmt->query_mem > 0) { PG_TRY(); { @@ -309,11 +242,11 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) { case RESMANAGER_MEMORY_POLICY_AUTO: PolicyAutoAssignOperatorMemoryKB(queryDesc->plannedstmt, - queryDesc->plannedstmt->query_mem); + queryDesc->plannedstmt->query_mem); break; case RESMANAGER_MEMORY_POLICY_EAGER_FREE: PolicyEagerFreeAssignOperatorMemoryKB(queryDesc->plannedstmt, - queryDesc->plannedstmt->query_mem); + queryDesc->plannedstmt->query_mem); break; default: Assert(IsResManagerMemoryPolicyNone()); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 2ab5f2b6d85..7fe3e7d0836 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -148,9 +148,6 @@ _copyPlannedStmt(const PlannedStmt *from) COPY_SCALAR_FIELD(query_mem); - COPY_SCALAR_FIELD(total_memory_coordinator); - COPY_SCALAR_FIELD(nsegments_coordinator); - COPY_NODE_FIELD(intoClause); COPY_NODE_FIELD(copyIntoClause); COPY_NODE_FIELD(refreshClause); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 2aabc00c5bd..d562b9c0397 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -381,9 +381,6 @@ _outPlannedStmt(StringInfo str, const PlannedStmt *node) WRITE_UINT64_FIELD(query_mem); - WRITE_UINT_FIELD(total_memory_coordinator); - WRITE_INT_FIELD(nsegments_coordinator); - WRITE_NODE_FIELD(intoClause); WRITE_NODE_FIELD(copyIntoClause); WRITE_NODE_FIELD(refreshClause); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 96433fa186d..7c73b544df1 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1659,9 +1659,6 @@ _readPlannedStmt(void) READ_UINT64_FIELD(query_mem); - READ_UINT_FIELD(total_memory_coordinator); - READ_INT_FIELD(nsegments_coordinator); - READ_NODE_FIELD(intoClause); READ_NODE_FIELD(copyIntoClause); READ_NODE_FIELD(refreshClause); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 0f9f3fac527..7ebc37b1436 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -788,7 +788,7 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ LEADING LEAKPROOF LEAST LEFT LEVEL LIKE LIMIT LISTEN LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION LOCK_P LOCKED LOCUS LOGGED - MAPPING MATCH MATERIALIZED MAXVALUE MEMORY_LIMIT MEMORY_SHARED_QUOTA MEMORY_SPILL_RATIO + MAPPING MATCH MATERIALIZED MAXVALUE MEMORY_LIMIT METHOD MINUTE_P MINVALUE MODE MONTH_P MOVE NAME_P NAMES NATIONAL NATURAL NCHAR NEW NEXT NFC NFD NFKC NFKD NO NONE @@ -842,7 +842,7 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ %token ACCOUNT ACTIVE - CONTAINS COORDINATOR CPUSET CPU_RATE_LIMIT + CONTAINS COORDINATOR CPUSET CPU_HARD_QUOTA_LIMIT CPU_SOFT_PRIORITY CREATEEXTTABLE @@ -1009,7 +1009,8 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ %nonassoc COPY %nonassoc COST %nonassoc CPUSET - %nonassoc CPU_RATE_LIMIT + %nonassoc CPU_HARD_QUOTA_LIMIT + %nonassoc CPU_SOFT_PRIORITY %nonassoc CREATEEXTTABLE %nonassoc CSV %nonassoc CURRENT_P @@ -1093,8 +1094,6 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ %nonassoc MATCH %nonassoc MAXVALUE %nonassoc MEMORY_LIMIT - %nonassoc MEMORY_SHARED_QUOTA - %nonassoc MEMORY_SPILL_RATIO %nonassoc MINUTE_P %nonassoc MINVALUE %nonassoc MISSING @@ -1727,26 +1726,18 @@ OptResourceGroupElem: /* was "concurrency" */ $$ = makeDefElem("concurrency", (Node *) makeInteger($2), @1); } - | CPU_RATE_LIMIT SignedIconst + | CPU_HARD_QUOTA_LIMIT SignedIconst { - $$ = makeDefElem("cpu_rate_limit", (Node *) makeInteger($2), @1); + $$ = makeDefElem("cpu_hard_quota_limit", (Node *) makeInteger($2), @1); } + | CPU_SOFT_PRIORITY SignedIconst + { + $$ = makeDefElem("cpu_soft_priority", (Node *) makeInteger($2), @1); + } | CPUSET Sconst { $$ = makeDefElem("cpuset", (Node *) makeString($2), @1); } - | MEMORY_SHARED_QUOTA SignedIconst - { - $$ = makeDefElem("memory_shared_quota", (Node *) makeInteger($2), @1); - } - | MEMORY_LIMIT SignedIconst - { - $$ = makeDefElem("memory_limit", (Node *) makeInteger($2), @1); - } - | MEMORY_SPILL_RATIO SignedIconst - { - $$ = makeDefElem("memory_spill_ratio", (Node *) makeInteger($2), @1); - } ; /***************************************************************************** @@ -18991,7 +18982,8 @@ unreserved_keyword: | COPY | COST | CPUSET - | CPU_RATE_LIMIT + | CPU_HARD_QUOTA_LIMIT + | CPU_SOFT_PRIORITY | CREATEEXTTABLE | CSV | CUBE @@ -19110,8 +19102,6 @@ unreserved_keyword: | MATERIALIZED | MAXVALUE | MEMORY_LIMIT - | MEMORY_SHARED_QUOTA - | MEMORY_SPILL_RATIO | METHOD | MINUTE_P | MINVALUE @@ -19368,7 +19358,8 @@ PartitionIdentKeyword: ABORT_P | COPY | COST | CPUSET - | CPU_RATE_LIMIT + | CPU_HARD_QUOTA_LIMIT + | CPU_SOFT_PRIORITY | CREATEEXTTABLE | CSV | CUBE @@ -19456,8 +19447,6 @@ PartitionIdentKeyword: ABORT_P | MATCH | MAXVALUE | MEMORY_LIMIT - | MEMORY_SHARED_QUOTA - | MEMORY_SPILL_RATIO | MINVALUE | MISSING | MODE @@ -19903,7 +19892,8 @@ bare_label_keyword: | COPY | COST | CPUSET - | CPU_RATE_LIMIT + | CPU_HARD_QUOTA_LIMIT + | CPU_SOFT_PRIORITY | CREATEEXTTABLE | CROSS | CSV @@ -20068,8 +20058,6 @@ bare_label_keyword: | MAXVALUE | MEDIAN | MEMORY_LIMIT - | MEMORY_SHARED_QUOTA - | MEMORY_SPILL_RATIO | METHOD | MINVALUE | MISSING diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 88aa9e57164..9ea46f041a3 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -226,10 +226,7 @@ bool gp_debug_resqueue_priority = false; /* Resource group GUCs */ int gp_resource_group_cpu_priority; double gp_resource_group_cpu_limit; -double gp_resource_group_memory_limit; bool gp_resource_group_bypass; -bool gp_resource_group_cpu_ceiling_enforcement; -bool gp_resource_group_enable_recalculate_query_mem; bool gp_resource_group_enable_cgroup_version_two; /* Metrics collector debug GUC */ @@ -1690,18 +1687,6 @@ struct config_bool ConfigureNamesBool_gp[] = NULL, NULL, NULL }, - { - - {"gp_log_resgroup_memory", PGC_USERSET, LOGGING_WHAT, - gettext_noop("Prints out messages related to resource group's memory management."), - NULL, - GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE - }, - &gp_log_resgroup_memory, - false, - NULL, NULL, NULL - }, - { {"gp_resqueue_print_operator_memory_limits", PGC_USERSET, LOGGING_WHAT, gettext_noop("Prints out the memory limit for operators (in explain) assigned by resource queue's " @@ -1714,18 +1699,6 @@ struct config_bool ConfigureNamesBool_gp[] = NULL, NULL, NULL }, - { - {"gp_resgroup_print_operator_memory_limits", PGC_USERSET, LOGGING_WHAT, - gettext_noop("Prints out the memory limit for operators (in explain) assigned by resource group's " - "memory management."), - NULL, - GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE - }, - &gp_resgroup_print_operator_memory_limits, - false, - NULL, NULL, NULL - }, - { {"gp_resgroup_debug_wait_queue", PGC_USERSET, DEVELOPER_OPTIONS, gettext_noop("Enable the debugging check on the wait queue of resource group."), @@ -2853,25 +2826,6 @@ struct config_bool ConfigureNamesBool_gp[] = check_gp_resource_group_bypass, NULL, NULL }, - { - {"gp_resource_group_cpu_ceiling_enforcement", PGC_POSTMASTER, RESOURCES, - gettext_noop("If the value is true, ceiling enforcement of CPU usage will be enabled"), - NULL - }, - &gp_resource_group_cpu_ceiling_enforcement, - false, NULL, NULL - }, - - { - {"gp_resource_group_enable_recalculate_query_mem", PGC_USERSET, RESOURCES, - gettext_noop("Enable resource group re-calculate the query_mem on QE"), - NULL - }, - &gp_resource_group_enable_recalculate_query_mem, - true, - NULL, NULL, NULL - }, - { {"gp_resource_group_enable_cgroup_version_two", PGC_POSTMASTER, RESOURCES, gettext_noop("Enable linux cgroup version 2"), @@ -3230,23 +3184,13 @@ struct config_int ConfigureNamesInt_gp[] = gpvars_check_statement_mem, NULL, NULL }, - { - {"memory_spill_ratio", PGC_USERSET, RESOURCES_MEM, - gettext_noop("Sets the memory_spill_ratio for resource group."), - NULL - }, - &memory_spill_ratio, - 20, 0, 100, - NULL, NULL, NULL - }, - { {"gp_resource_group_cpu_priority", PGC_POSTMASTER, RESOURCES, gettext_noop("Sets the cpu priority for postgres processes when resource group is enabled."), NULL }, &gp_resource_group_cpu_priority, - 1, 1, 50, + 10, 1, 50, NULL, NULL, NULL }, @@ -4104,17 +4048,6 @@ struct config_int ConfigureNamesInt_gp[] = NULL, NULL, NULL }, - { - {"gp_resgroup_memory_policy_auto_fixed_mem", PGC_USERSET, RESOURCES_MEM, - gettext_noop("Sets the fixed amount of memory reserved for non-memory intensive operators in the AUTO policy."), - NULL, - GUC_UNIT_KB | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE - }, - &gp_resgroup_memory_policy_auto_fixed_mem, - 100, 50, INT_MAX, - NULL, NULL, NULL - }, - { {"gp_global_deadlock_detector_period", PGC_SIGHUP, LOCK_MANAGEMENT, gettext_noop("Sets the executing period of global deadlock detector backend."), @@ -4447,16 +4380,6 @@ struct config_real ConfigureNamesReal_gp[] = NULL, NULL, NULL }, - { - {"gp_resource_group_memory_limit", PGC_POSTMASTER, RESOURCES, - gettext_noop("Maximum percentage of memory resources assigned to a cluster."), - NULL - }, - &gp_resource_group_memory_limit, - 0.7, 0.0001, 1.0, - NULL, NULL, NULL - }, - { {"optimizer_damping_factor_filter", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("select predicate damping factor in optimizer, 1.0 means no damping"), @@ -5011,15 +4934,6 @@ struct config_enum ConfigureNamesEnum_gp[] = NULL, NULL, NULL }, - { - {"gp_resgroup_memory_policy", PGC_SUSET, RESOURCES_MGM, - gettext_noop("Sets the policy for memory allocation of queries."), - gettext_noop("Valid values are AUTO, EAGER_FREE.") - }, - &gp_resgroup_memory_policy, - RESMANAGER_MEMORY_POLICY_EAGER_FREE, gp_resqueue_memory_policies, NULL, NULL - }, - { {"optimizer_join_order", PGC_USERSET, QUERY_TUNING_OTHER, gettext_noop("Set optimizer join heuristic model."), diff --git a/src/backend/utils/mmgr/redzone_handler.c b/src/backend/utils/mmgr/redzone_handler.c index c920d395695..03ab4fbfc67 100644 --- a/src/backend/utils/mmgr/redzone_handler.c +++ b/src/backend/utils/mmgr/redzone_handler.c @@ -145,12 +145,7 @@ RedZoneHandler_IsVmemRedZone() return false; if (vmemTrackerInited) - { - if (IsResGroupEnabled()) - return IsGroupInRedZone(); - else - return *segmentVmemChunks > redZoneChunks; - } + return *segmentVmemChunks > redZoneChunks; return false; } @@ -212,30 +207,6 @@ RedZoneHandler_FlagTopConsumer() SessionState *curSessionState = AllSessionStateEntries->usedList; - /* - * Find the group which used the most of global memory in resgroup mode. - */ - if (IsResGroupEnabled()) - { - int32 maxGlobalShareMem = 0; - int32 sessionGroupGSMem; - - while (curSessionState != NULL) - { - Assert(INVALID_SESSION_ID != curSessionState->sessionId); - - sessionGroupGSMem = SessionGetResGroupGlobalShareMemUsage(curSessionState); - - if (sessionGroupGSMem > maxGlobalShareMem) - { - maxGlobalShareMem = sessionGroupGSMem; - resGroupId = SessionGetResGroupId(curSessionState); - } - - curSessionState = curSessionState->next; - } - } - curSessionState = AllSessionStateEntries->usedList; while (curSessionState != NULL) diff --git a/src/backend/utils/mmgr/runaway_cleaner.c b/src/backend/utils/mmgr/runaway_cleaner.c index ff48eaad2fe..86aaa72c5f0 100644 --- a/src/backend/utils/mmgr/runaway_cleaner.c +++ b/src/backend/utils/mmgr/runaway_cleaner.c @@ -179,27 +179,16 @@ RunawayCleaner_StartCleanup() Assert(beginCleanupRunawayVersion < *latestRunawayVersion); Assert(endCleanupRunawayVersion < *latestRunawayVersion); - /* We don't want to cleanup multiple times for same runaway event */ + /* We don't want to clean up multiple times for same runaway event */ beginCleanupRunawayVersion = *latestRunawayVersion; if (RunawayCleaner_ShouldCancelQuery()) { SIMPLE_FAULT_INJECTOR("runaway_cleanup"); - if (IsResGroupEnabled()) - { - StringInfoData str; - initStringInfo(&str); - - LWLockAcquire(ResGroupLock, LW_SHARED); - ResGroupGetMemoryRunawayInfo(&str); - LWLockRelease(ResGroupLock); - ereport(ERROR, (errmsg("Canceling query because of high VMEM usage. %s", str.data))); - } - else - ereport(ERROR, (errmsg("Canceling query because of high VMEM usage. Used: %dMB, available %dMB, red zone: %dMB", - VmemTracker_ConvertVmemChunksToMB(MySessionState->sessionVmem), VmemTracker_GetAvailableVmemMB(), - RedZoneHandler_GetRedZoneLimitMB()), errprintstack(true))); + ereport(ERROR, (errmsg("Canceling query because of high VMEM usage. Used: %dMB, available %dMB, red zone: %dMB", + VmemTracker_ConvertVmemChunksToMB(MySessionState->sessionVmem), VmemTracker_GetAvailableVmemMB(), + RedZoneHandler_GetRedZoneLimitMB()), errprintstack(true))); } /* diff --git a/src/backend/utils/mmgr/vmem_tracker.c b/src/backend/utils/mmgr/vmem_tracker.c index 710ade8c69e..cbfafcafd50 100644 --- a/src/backend/utils/mmgr/vmem_tracker.c +++ b/src/backend/utils/mmgr/vmem_tracker.c @@ -58,7 +58,7 @@ static int32 startupChunks = 0; /* Vmem quota in chunk unit */ static int32 vmemChunksQuota = 0; /* - * Chunk size in bits. By default a chunk is 1MB, but it can be larger + * Chunk size in bits. By default, a chunk is 1MB, but it can be larger * depending on the vmem quota. */ static int chunkSizeInBits = BITS_IN_MB; @@ -215,12 +215,6 @@ VmemTracker_ReserveVmemChunks(int32 numChunksToReserve) bool waiverUsed = false; - if (!ResGroupReserveMemory(numChunksToReserve, waivedChunks, &waiverUsed)) - { - pg_atomic_sub_fetch_u32((pg_atomic_uint32 *)&MySessionState->sessionVmem, numChunksToReserve); - return MemoryFailure_ResourceGroupMemoryExhausted; - } - /* * Query vmem quota exhausted, so rollback the reservation and return error. * For non-QE processes and processes in critical section, we don't enforce @@ -235,8 +229,6 @@ VmemTracker_ReserveVmemChunks(int32 numChunksToReserve) { /* Revert the reserved space, but don't revert the prev_alloc as we have already set the firstTime to false */ pg_atomic_sub_fetch_u32((pg_atomic_uint32 *)&MySessionState->sessionVmem, numChunksToReserve); - /* Revert resgroup memory reservation */ - ResGroupReleaseMemory(numChunksToReserve); return MemoryFailure_QueryMemoryExhausted; } waiverUsed = true; @@ -260,8 +252,6 @@ VmemTracker_ReserveVmemChunks(int32 numChunksToReserve) pg_atomic_sub_fetch_u32((pg_atomic_uint32 *)&MySessionState->sessionVmem, numChunksToReserve); /* Revert vmem reservation */ pg_atomic_sub_fetch_u32((pg_atomic_uint32 *)segmentVmemChunks, numChunksToReserve); - /* Revert resgroup memory reservation */ - ResGroupReleaseMemory(numChunksToReserve); return MemoryFailure_VmemExhausted; } @@ -300,7 +290,6 @@ VmemTracker_ReleaseVmemChunks(int reduction) Assert(*segmentVmemChunks >= 0); Assert(NULL != MySessionState); pg_atomic_sub_fetch_u32((pg_atomic_uint32 *)&MySessionState->sessionVmem, reduction); - ResGroupReleaseMemory(reduction); Assert(0 <= MySessionState->sessionVmem); trackedVmemChunks -= reduction; } @@ -318,7 +307,7 @@ ReleaseAllVmemChunks() /* * Returns the available VMEM in "chunks" unit. If the available chunks - * is less than 0, it return 0. + * is less than 0, it returns 0. */ static int32 VmemTracker_GetNonNegativeAvailableVmemChunks() @@ -337,7 +326,7 @@ VmemTracker_GetNonNegativeAvailableVmemChunks() /* * Returns the available query chunks. If the available chunks - * is less than 0, it return 0. + * is less than 0, it returns 0. */ static int32 VmemTracker_GetNonNegativeAvailableQueryChunks() @@ -354,21 +343,21 @@ VmemTracker_GetNonNegativeAvailableQueryChunks() } } -/* Converts chunks to MB */ +/* Convert chunks to MB */ int32 VmemTracker_ConvertVmemChunksToMB(int chunks) { return CHUNKS_TO_MB(chunks); } -/* Converts MB to chunks */ +/* Convert MB to chunks */ int32 VmemTracker_ConvertVmemMBToChunks(int mb) { return MB_TO_CHUNKS(mb); } -/* Converts chunks to bytes */ +/* Convert chunks to bytes */ int64 VmemTracker_ConvertVmemChunksToBytes(int chunks) { @@ -434,8 +423,7 @@ VmemTracker_GetVmemLimitChunks(void) * until resource group is activated, otherwise, there might * be an inconsistency about the vmem limit. */ - return IsResGroupEnabled() ? - ResGroupGetVmemLimitChunks() : vmemChunksQuota; + return vmemChunksQuota; } /* @@ -451,8 +439,7 @@ VmemTracker_GetChunkSizeInBits(void) * until resource group is activated, otherwise, there might * be an inconsistency about the chunk size. */ - return IsResGroupEnabled() ? - ResGroupGetVmemChunkSizeInBits() : chunkSizeInBits; + return chunkSizeInBits; } /* @@ -461,8 +448,7 @@ VmemTracker_GetChunkSizeInBits(void) static int32 VmemTracker_GetMaxChunksPerQuery(void) { - return IsResGroupEnabled() ? - ResGroupGetMaxChunksPerQuery() : maxChunksPerQuery; + return maxChunksPerQuery; } /* diff --git a/src/backend/utils/resgroup/cgroup-ops-linux-v1.c b/src/backend/utils/resgroup/cgroup-ops-linux-v1.c index 7d3b774dfd2..88fff256825 100644 --- a/src/backend/utils/resgroup/cgroup-ops-linux-v1.c +++ b/src/backend/utils/resgroup/cgroup-ops-linux-v1.c @@ -146,20 +146,6 @@ static const PermItem perm_items_cpuset[] = { CGROUP_COMPONENT_CPUSET, "cpuset.mems", R_OK | W_OK }, { CGROUP_COMPONENT_UNKNOWN, NULL, 0 } }; -static const PermItem perm_items_memory[] = -{ - { CGROUP_COMPONENT_MEMORY, "", R_OK | W_OK | X_OK }, - { CGROUP_COMPONENT_MEMORY, "memory.limit_in_bytes", R_OK | W_OK }, - { CGROUP_COMPONENT_MEMORY, "memory.usage_in_bytes", R_OK }, - { CGROUP_COMPONENT_UNKNOWN, NULL, 0 } -}; -static const PermItem perm_items_swap[] = -{ - { CGROUP_COMPONENT_MEMORY, "", R_OK | W_OK | X_OK }, - { CGROUP_COMPONENT_MEMORY, "memory.memsw.limit_in_bytes", R_OK | W_OK }, - { CGROUP_COMPONENT_MEMORY, "memory.memsw.usage_in_bytes", R_OK }, - { CGROUP_COMPONENT_UNKNOWN, NULL, 0 } -}; /* * just for cpuset check, same as the cpuset Permlist in permlists @@ -176,28 +162,6 @@ static const PermList cpusetPermList = */ static const PermList permlists[] = { - /* - * swap permissions are optional. - * - * cgroup/memory/memory.memsw.* is only available if - * - CONFIG_MEMCG_SWAP_ENABLED=on in kernel config, or - * - swapaccount=1 in kernel cmdline. - * - * Without these interfaces the swap usage can not be limited or accounted - * via cgroup. - */ - { perm_items_swap, true, &gp_resource_group_enable_cgroup_swap }, - - /* - * memory permissions can be mandatory or optional depends on the switch. - * - * resgroup memory auditor is introduced in 6.0 devel and backport - * to 5.x branch since 5.6.1. To provide backward compatibilities' memory - * permissions are optional on 5.x branch. - */ - { perm_items_memory, CGROUP_MEMORY_IS_OPTIONAL, - &gp_resource_group_enable_cgroup_memory }, - /* cpu/cpuacct permissions are mandatory */ { perm_items_cpu, false, NULL }, { perm_items_cpu_acct, false, NULL }, @@ -226,12 +190,8 @@ static void detachcgroup_v1(Oid group, CGroupComponentType component, int fd_dir static void destroycgroup_v1(Oid group, bool migrate); static int lockcgroup_v1(Oid group, CGroupComponentType component, bool block); static void unlockcgroup_v1(int fd); -static void setcpulimit_v1(Oid group, int cpu_rate_limit); -static void setmemorylimitbychunks_v1(Oid group, int32 memory_limit_chunks); -static void setmemorylimit_v1(Oid group, int memory_limit); +static void setcpulimit_v1(Oid group, int cpu_hard_limit); static int64 getcpuusage_v1(Oid group); -static int32 getmemoryusage_v1(Oid group); -static int32 getmemorylimitchunks_v1(Oid group); static void getcpuset_v1(Oid group, char *cpuset, int len); static void setcpuset_v1(Oid group, const char *cpuset); static float convertcpuusage_v1(int64 usage, int64 duration); @@ -807,8 +767,6 @@ createcgroup_v1(Oid group) if (!createDir(group, CGROUP_COMPONENT_CPU) || !createDir(group, CGROUP_COMPONENT_CPUACCT) || - (gp_resource_group_enable_cgroup_memory && - !createDir(group, CGROUP_COMPONENT_MEMORY)) || (gp_resource_group_enable_cgroup_cpuset && !createDir(group, CGROUP_COMPONENT_CPUSET))) { @@ -1079,9 +1037,7 @@ destroycgroup_v1(Oid group, bool migrate) if (!deleteDir(group, CGROUP_COMPONENT_CPU, "cpu.shares", migrate, detachcgroup_v1) || !deleteDir(group, CGROUP_COMPONENT_CPUACCT, NULL, migrate, detachcgroup_v1) || (gp_resource_group_enable_cgroup_cpuset && - !deleteDir(group, CGROUP_COMPONENT_CPUSET, NULL, migrate, detachcgroup_v1)) || - (gp_resource_group_enable_cgroup_memory && - !deleteDir(group, CGROUP_COMPONENT_MEMORY, "memory.limit_in_bytes", migrate, detachcgroup_v1))) + !deleteDir(group, CGROUP_COMPONENT_CPUSET, NULL, migrate, detachcgroup_v1))) { CGROUP_ERROR("can't remove cgroup for resource group '%d': %m", group); } @@ -1122,116 +1078,41 @@ unlockcgroup_v1(int fd) } /* - * Set the cpu rate limit for the OS group. + * Set the cpu hard limit for the OS group. * - * cpu_rate_limit should be within [0, 100]. + * cpu_hard_quota_limit should be within [-1, 100]. */ static void -setcpulimit_v1(Oid group, int cpu_rate_limit) +setcpulimit_v1(Oid group, int cpu_hard_limit) { CGroupComponentType component = CGROUP_COMPONENT_CPU; - /* group.shares := gpdb.shares * cpu_rate_limit */ - - int64 shares = readInt64(CGROUP_ROOT_ID, BASEDIR_GPDB, component, - "cpu.shares"); - writeInt64(group, BASEDIR_GPDB, component, - "cpu.shares", shares * cpu_rate_limit / 100); - - /* set cpu.cfs_quota_us if hard CPU enforcement is enabled */ - if (gp_resource_group_cpu_ceiling_enforcement) + if (cpu_hard_limit > 0) { int64 periods = get_cfs_period_us_alpha(component); writeInt64(group, BASEDIR_GPDB, component, "cpu.cfs_quota_us", - periods * cgroupSystemInfoAlpha.ncores * cpu_rate_limit / 100); + periods * cgroupSystemInfoAlpha.ncores * cpu_hard_limit / 100); } else { - writeInt64(group, BASEDIR_GPDB, component, "cpu.cfs_quota_us", -1); + writeInt64(group, BASEDIR_GPDB, component, "cpu.cfs_quota_us", cpu_hard_limit); } } - /* - * Set the memory limit for the OS group by value. + * Set the cpu soft priority for the OS group. * - * memory_limit is the limit value in chunks - * - * If cgroup supports memory swap, we will write the same limit to - * memory.memsw.limit and memory.limit. + * For version 1, the default value of cpu.shares is 1024, corresponding to + * our cpu_soft_priority, which default value is 100, so we need to adjust it. */ static void -setmemorylimitbychunks_v1(Oid group, int32 memory_limit_chunks) +setcpupriority_v1(Oid group, int shares) { - CGroupComponentType component = CGROUP_COMPONENT_MEMORY; - int64 memory_limit_in_bytes; - - if (!gp_resource_group_enable_cgroup_memory) - return; - - memory_limit_in_bytes = VmemTracker_ConvertVmemChunksToBytes(memory_limit_chunks); - - /* Is swap interfaces enabled? */ - if (!gp_resource_group_enable_cgroup_swap) - { - /* No, then we only need to setup the memory limit */ - writeInt64(group, BASEDIR_GPDB, component, "memory.limit_in_bytes", - memory_limit_in_bytes); - } - else - { - /* Yes, then we have to setup both the memory and mem+swap limits */ - - int64 memory_limit_in_bytes_old; - - /* - * Memory limit should always <= mem+swap limit, then the limits - * must be set in a proper order depending on the relation between - * new and old limits. - */ - memory_limit_in_bytes_old = readInt64(group, BASEDIR_GPDB, component, - "memory.limit_in_bytes"); - - if (memory_limit_in_bytes > memory_limit_in_bytes_old) - { - /* When new value > old memory limit, write mem+swap limit first */ - writeInt64(group, BASEDIR_GPDB, component, - "memory.memsw.limit_in_bytes", memory_limit_in_bytes); - writeInt64(group, BASEDIR_GPDB, component, - "memory.limit_in_bytes", memory_limit_in_bytes); - } - else if (memory_limit_in_bytes < memory_limit_in_bytes_old) - { - /* When new value < old memory limit, write memory limit first */ - writeInt64(group, BASEDIR_GPDB, component, - "memory.limit_in_bytes", memory_limit_in_bytes); - writeInt64(group, BASEDIR_GPDB, component, - "memory.memsw.limit_in_bytes", memory_limit_in_bytes); - } - } -} - -/* - * Set the memory limit for the OS group by rate. - * - * memory_limit should be within [0, 100]. - */ -static void -setmemorylimit_v1(Oid group, int memory_limit) -{ - CGroupComponentType component = CGROUP_COMPONENT_MEMORY; - int fd; - int32 memory_limit_in_chunks; - - memory_limit_in_chunks = ResGroupGetVmemLimitChunks() * memory_limit / 100; - memory_limit_in_chunks *= ResGroupGetHostPrimaryCount(); - - fd = lockcgroup_v1(group, component, true); - setmemorylimitbychunks_v1(group, memory_limit_in_chunks); - unlockcgroup_v1(fd); + CGroupComponentType component = CGROUP_COMPONENT_CPU; + writeInt64(group, BASEDIR_GPDB, component, + "cpu.shares", (int64)(shares * 1024 / 100)); } - /* * Get the cpu usage of the OS group, that is the total cpu time obtained * by this OS group, in nano seconds. @@ -1244,133 +1125,6 @@ getcpuusage_v1(Oid group) return readInt64(group, BASEDIR_GPDB, component, "cpuacct.usage"); } -/* get cgroup ram and swap (in Byte) */ -static void -get_cgroup_memory_info(uint64 *cgram, uint64 *cgmemsw) -{ - CGroupComponentType component = CGROUP_COMPONENT_MEMORY; - - *cgram = readInt64(CGROUP_ROOT_ID, BASEDIR_PARENT, - component, "memory.limit_in_bytes"); - - if (gp_resource_group_enable_cgroup_swap) - { - *cgmemsw = readInt64(CGROUP_ROOT_ID, BASEDIR_PARENT, - component, "memory.memsw.limit_in_bytes"); - } - else - { - elog(DEBUG1, "swap memory is unlimited"); - *cgmemsw = (uint64) -1LL; - } -} - -/* get total ram and total swap (in Byte) from sysinfo */ -static void -get_memory_info(unsigned long *ram, unsigned long *swap) -{ - struct sysinfo info; - if (sysinfo(&info) < 0) - elog(ERROR, "can't get memory information: %m"); - *ram = info.totalram; - *swap = info.totalswap; -} - -/* get vm.overcommit_ratio */ -static int -getOvercommitRatio(void) -{ - int ratio; - char data[MAX_INT_STRING_LEN]; - size_t datasize = sizeof(data); - const char *path = "/proc/sys/vm/overcommit_ratio"; - - readData(path, data, datasize); - - if (sscanf(data, "%d", &ratio) != 1) - elog(ERROR, "invalid number '%s' in '%s'", data, path); - - return ratio; -} - -static int -gettotalmemory_v1(void) -{ - unsigned long ram, swap, total; - int overcommitRatio; - uint64 cgram, cgmemsw; - uint64 memsw; - uint64 outTotal; - - overcommitRatio = getOvercommitRatio(); - get_memory_info(&ram, &swap); - /* Get sysinfo total ram and swap size. */ - memsw = ram + swap; - outTotal = swap + ram * overcommitRatio / 100; - get_cgroup_memory_info(&cgram, &cgmemsw); - ram = Min(ram, cgram); - /* - * In the case that total ram and swap read from sysinfo is larger than - * from cgroup, ram and swap must both be limited, otherwise swap must - * not be limited(we can safely use the value from sysinfo as swap size). - */ - if (cgmemsw < memsw) - swap = cgmemsw - ram; - /* - * If it is in container, the total memory is limited by both the total - * memoery outside and the memsw of the container. - */ - total = Min(outTotal, swap + ram); - return total >> BITS_IN_MB; -} - -/* - * Get the memory usage of the OS group - * - * memory usage is returned in chunks - */ -static int32 -getmemoryusage_v1(Oid group) -{ - CGroupComponentType component = CGROUP_COMPONENT_MEMORY; - int64 memory_usage_in_bytes; - char *filename; - - /* Report 0 if cgroup memory is not enabled */ - if (!gp_resource_group_enable_cgroup_memory) - return 0; - - filename = gp_resource_group_enable_cgroup_swap - ? "memory.memsw.usage_in_bytes" - : "memory.usage_in_bytes"; - - memory_usage_in_bytes = readInt64(group, BASEDIR_GPDB, component, filename); - - return VmemTracker_ConvertVmemBytesToChunks(memory_usage_in_bytes); -} - -/* - * Get the memory limit of the OS group - * - * memory limit is returned in chunks - */ -static int32 -getmemorylimitchunks_v1(Oid group) -{ - CGroupComponentType component = CGROUP_COMPONENT_MEMORY; - int64 memory_limit_in_bytes; - - /* Report unlimited (max int32) if cgroup memory is not enabled */ - if (!gp_resource_group_enable_cgroup_memory) - return (int32) ((1U << 31) - 1); - - memory_limit_in_bytes = readInt64(group, BASEDIR_GPDB, - component, "memory.limit_in_bytes"); - - return VmemTracker_ConvertVmemBytesToChunks(memory_limit_in_bytes); -} - - /* * Get the cpuset of the OS group. * @param group: the destination group @@ -1478,15 +1232,10 @@ static CGroupOpsRoutine cGroupOpsRoutineAlpha = { .setcpulimit = setcpulimit_v1, .getcpuusage = getcpuusage_v1, + .setcpupriority = setcpupriority_v1, .getcpuset = getcpuset_v1, .setcpuset = setcpuset_v1, - .gettotalmemory = gettotalmemory_v1, - .getmemoryusage = getmemoryusage_v1, - .setmemorylimit = setmemorylimit_v1, - .getmemorylimitchunks = getmemorylimitchunks_v1, - .setmemorylimitbychunks = setmemorylimitbychunks_v1, - .convertcpuusage = convertcpuusage_v1, }; diff --git a/src/backend/utils/resgroup/cgroup.c b/src/backend/utils/resgroup/cgroup.c index d92f5ae4bfd..c0ce70ebbff 100644 --- a/src/backend/utils/resgroup/cgroup.c +++ b/src/backend/utils/resgroup/cgroup.c @@ -26,13 +26,13 @@ /* cgroup component names. */ const char *component_names[CGROUP_COMPONENT_COUNT] = { - "cpu", "cpuacct", "memory", "cpuset" + "cpu", "cpuacct", "cpuset" }; /* cgroup component dirs. */ char component_dirs[CGROUP_COMPONENT_COUNT][MAX_CGROUP_PATHLEN] = { - FALLBACK_COMP_DIR, FALLBACK_COMP_DIR, FALLBACK_COMP_DIR, FALLBACK_COMP_DIR + FALLBACK_COMP_DIR, FALLBACK_COMP_DIR, FALLBACK_COMP_DIR }; diff --git a/src/backend/utils/resgroup/resgroup.c b/src/backend/utils/resgroup/resgroup.c index 572f7fb8da9..7994fa3064d 100644 --- a/src/backend/utils/resgroup/resgroup.c +++ b/src/backend/utils/resgroup/resgroup.c @@ -3,27 +3,6 @@ * resgroup.c * GPDB resource group management code. * - * - * TERMS: - * - * - FIXED QUOTA: the minimal memory quota reserved for a slot. This quota - * is promised to be available during the lifecycle of the slot. - * - * - SHARED QUOTA: the preemptive memory quota shared by all the slots - * in a resource group. When a slot want to use more memory than its - * FIXED QUOTA it can attempt to allocate from this SHARED QUOTA, however - * this allocation is possible to fail depending on the actual usage. - * - * - MEM POOL: the global memory quota pool shared by all the resource groups. - * Overuse in this pool is strictly forbidden. A resource group must - * acquire from this pool to have enough memory quota for its slots' - * FIXED QUOTA and SHARED QUOTA, and should release overused quota to - * this pool as soon as possible. - * - * - SLOT POOL: the global slot pool shared by all the resource groups. - * A resource group must acquire a free slot in this pool for a new - * transaction to run in it. - * * Portions Copyright (c) 2023, HashData Technology Limited. * Portions Copyright (c) 2006-2010, Greenplum inc. * Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates. @@ -80,23 +59,10 @@ #define InvalidSlotId (-1) #define RESGROUP_MAX_SLOTS (MaxConnections) -/* - * A hard memory limit in by pass mode, in chunks - * More chunks are reserved on QD than on QE because planner and orca - * may need more memory to generate and optimize the plan. - */ -#define RESGROUP_BYPASS_MODE_MEMORY_LIMIT_ON_QD 30 -#define RESGROUP_BYPASS_MODE_MEMORY_LIMIT_ON_QE 10 - /* * GUC variables. */ -int gp_resgroup_memory_policy = RESMANAGER_MEMORY_POLICY_NONE; -bool gp_log_resgroup_memory = false; -int gp_resgroup_memory_policy_auto_fixed_mem; -bool gp_resgroup_print_operator_memory_limits = false; bool gp_resgroup_debug_wait_queue = true; -int memory_spill_ratio = 20; int gp_resource_group_queuing_timeout = 0; /* @@ -138,14 +104,6 @@ struct ResGroupProcData { Oid groupId; - int32 memUsage; /* memory usage of current proc */ - /* - * Record current bypass memory limit for each bypass queries. - * For bypass mode, memUsage of current process could accumulate in a session. - * So should limit the memory usage for each query instead of the whole session. - */ - int32 bypassMemoryLimit; - ResGroupData *group; ResGroupSlotData *slot; @@ -165,8 +123,6 @@ struct ResGroupSlotData Oid groupId; ResGroupData *group; /* pointer to the group */ - int32 memQuota; /* memory quota of current slot */ - int32 memUsage; /* total memory usage of procs belongs to this slot */ int nProcs; /* number of procs in this slot */ ResGroupSlotData *next; @@ -174,84 +130,27 @@ struct ResGroupSlotData ResGroupCaps caps; }; -/* - * Resource group operations for memory. - * - * Groups with different memory auditor will have different - * operations. - */ -typedef struct ResGroupMemOperations -{ - void (*group_mem_on_create) (Oid groupId, ResGroupData *group); - void (*group_mem_on_alter) (Oid groupId, ResGroupData *group); - void (*group_mem_on_drop) (Oid groupId, ResGroupData *group); - void (*group_mem_on_notify) (ResGroupData *group); - void (*group_mem_on_dump) (ResGroupData *group, StringInfo str); -} ResGroupMemOperations; - /* * Resource group information. */ struct ResGroupData { - Oid groupId; /* Id for this group */ - - /* - * memGap is calculated as: - * (memory limit (before alter) - memory expected (after alter)) - * - * It stands for how many memory (in chunks) this group should - * give back to MEM POOL. - */ - int32 memGap; - - int32 memExpected; /* expected memory chunks according to current caps */ - int32 memQuotaGranted; /* memory chunks for quota part */ - int32 memSharedGranted; /* memory chunks for shared part */ - - volatile int32 memQuotaUsed; /* memory chunks assigned to all the running slots */ + Oid groupId; /* ID for this group */ - /* - * memory usage of this group, should always equal to the - * sum of session memory(session_state->sessionVmem) that - * belongs to this group - */ - volatile int32 memUsage; - volatile int32 memSharedUsage; + volatile int nRunning; /* number of running trans */ + volatile int nRunningBypassed; /* number of running trans in bypass mode */ + int totalExecuted; /* total number of executed trans */ + int totalQueued; /* total number of queued trans */ + int64 totalQueuedTimeMs; /* total queue time, in milliseconds */ + PROC_QUEUE waitProcs; /* list of PGPROC objects waiting on this group */ - volatile int nRunning; /* number of running trans */ - volatile int nRunningBypassed; /* number of running trans in bypass mode */ - int totalExecuted; /* total number of executed trans */ - int totalQueued; /* total number of queued trans */ - int64 totalQueuedTimeMs; /* total queue time, in milliseconds */ - PROC_QUEUE waitProcs; /* list of PGPROC objects waiting on this group */ + bool lockedForDrop; /* true if resource group is dropped but not committed yet */ - /* - * operation functions for resource group - */ - const ResGroupMemOperations *groupMemOps; - - bool lockedForDrop; /* true if resource group is dropped but not committed yet */ - - ResGroupCaps caps; /* capabilities of this group */ + ResGroupCaps caps; /* capabilities of this group */ }; struct ResGroupControl { - int32 totalChunks; /* total memory chunks on this segment */ - /* - * Safe memory threshold: - * if remained global shared memory is less than this threshold, - * then the resource group memory usage is in red zone. - * Note that safeChunksThreshold100 is 100 times bigger than the real safe chunks. - * This is used to avoid rounding problem caused by runaway_detector_activation_percent - */ - pg_atomic_uint32 safeChunksThreshold100; - pg_atomic_uint32 freeChunks; /* memory chunks not allocated to any group, - will be used for the query which group share - memory is not enough*/ - - int32 chunkSizeInBits; int segmentsOnMaster; ResGroupSlotData *slots; /* slot pool shared by all resource groups */ @@ -269,8 +168,6 @@ struct ResGroupControl ResGroupData groups[1]; }; -bool gp_resource_group_enable_cgroup_memory = false; -bool gp_resource_group_enable_cgroup_swap = false; bool gp_resource_group_enable_cgroup_cpuset = false; CGroupOpsRoutine *cgroupOpsRoutine = NULL; @@ -301,24 +198,9 @@ static ResGroupSlotData bypassedSlot; /* static functions */ -static bool groupApplyMemCaps(ResGroupData *group); -static int32 mempoolReserve(Oid groupId, int32 chunks); -static void mempoolRelease(Oid groupId, int32 chunks); -static void groupRebalanceQuota(ResGroupData *group, - int32 chunks, - const ResGroupCaps *caps); -static void decideTotalChunks(int32 *totalChunks, int32 *chunkSizeInBits); -static int32 groupGetMemExpected(const ResGroupCaps *caps); -static int32 groupGetMemQuotaExpected(const ResGroupCaps *caps); -static int32 groupGetMemSharedExpected(const ResGroupCaps *caps); -static int32 groupGetMemSpillTotal(const ResGroupCaps *caps); -static int32 slotGetMemQuotaExpected(const ResGroupCaps *caps); -static int32 slotGetMemQuotaOnQE(const ResGroupCaps *caps, ResGroupData *group); -static int32 slotGetMemSpill(const ResGroupCaps *caps); + static void wakeupSlots(ResGroupData *group, bool grant); -static void notifyGroupsOnMem(Oid skipGroupId); -static int32 mempoolAutoRelease(ResGroupData *group); -static int32 mempoolAutoReserve(ResGroupData *group, const ResGroupCaps *caps); + static ResGroupData *groupHashNew(Oid groupId); static ResGroupData *groupHashFind(Oid groupId, bool raise); static ResGroupData *groupHashRemove(Oid groupId); @@ -327,18 +209,8 @@ static ResGroupData *createGroup(Oid groupId, const ResGroupCaps *caps); static void removeGroup(Oid groupId); static void AtProcExit_ResGroup(int code, Datum arg); static void groupWaitCancel(bool isMoveQuery); -static int32 groupReserveMemQuota(ResGroupData *group); -static void groupReleaseMemQuota(ResGroupData *group, ResGroupSlotData *slot); -static int32 groupIncMemUsage(ResGroupData *group, - ResGroupSlotData *slot, - int32 chunks); -static int32 groupDecMemUsage(ResGroupData *group, - ResGroupSlotData *slot, - int32 chunks); -static int32 groupIncSlotMemUsage(ResGroupData *group, ResGroupSlotData *slot); -static void groupDecSlotMemUsage(ResGroupData *group, ResGroupSlotData *slot); -static void initSlot(ResGroupSlotData *slot, ResGroupData *group, - int32 slotMemQuota); + +static void initSlot(ResGroupSlotData *slot, ResGroupData *group); static void selfAttachResGroup(ResGroupData *group, ResGroupSlotData *slot); static void selfDetachResGroup(ResGroupData *group, ResGroupSlotData *slot); static bool slotpoolInit(void); @@ -353,8 +225,6 @@ static void groupDecBypassedRef(ResGroupData *group); static ResGroupSlotData *groupAcquireSlot(ResGroupInfo *pGroupInfo, bool isMoveQuery); static void groupReleaseSlot(ResGroupData *group, ResGroupSlotData *slot, bool isMoveQuery); static void addTotalQueueDuration(ResGroupData *group); -static void groupSetMemorySpillRatio(const ResGroupCaps *caps); -static char *groupDumpMemUsage(ResGroupData *group); static void selfValidateResGroupInfo(void); static bool selfIsAssigned(void); static void selfSetGroup(ResGroupData *group); @@ -385,19 +255,6 @@ static void sessionSetSlot(ResGroupSlotData *slot); static void sessionResetSlot(void); static ResGroupSlotData *sessionGetSlot(void); -static void bindGroupOperation(ResGroupData *group); -static void groupMemOnAlterForVmtracker(Oid groupId, ResGroupData *group); -static void groupMemOnDropForVmtracker(Oid groupId, ResGroupData *group); -static void groupMemOnNotifyForVmtracker(ResGroupData *group); -static void groupMemOnDumpForVmtracker(ResGroupData *group, StringInfo str); - -static void groupMemOnAlterForCgroup(Oid groupId, ResGroupData *group); -static void groupMemOnDropForCgroup(Oid groupId, ResGroupData *group); -static void groupMemOnNotifyForCgroup(ResGroupData *group); -static void groupMemOnDumpForCgroup(ResGroupData *group, StringInfo str); -static void groupApplyCgroupMemInc(ResGroupData *group); -static void groupApplyCgroupMemDec(ResGroupData *group); - static void cpusetOperation(char *cpuset1, const char *cpuset2, int len, @@ -413,28 +270,6 @@ static bool groupIsNotDropped(const ResGroupData *group); static bool groupWaitQueueFind(ResGroupData *group, const PGPROC *proc); #endif /* USE_ASSERT_CHECKING */ -/* - * Operations of memory for resource groups with vmtracker memory auditor. - */ -static const ResGroupMemOperations resgroup_memory_operations_vmtracker = { - .group_mem_on_create = NULL, - .group_mem_on_alter = groupMemOnAlterForVmtracker, - .group_mem_on_drop = groupMemOnDropForVmtracker, - .group_mem_on_notify = groupMemOnNotifyForVmtracker, - .group_mem_on_dump = groupMemOnDumpForVmtracker, -}; - -/* - * Operations of memory for resource groups with cgroup memory auditor. - */ -static const ResGroupMemOperations resgroup_memory_operations_cgroup = { - .group_mem_on_create = NULL, - .group_mem_on_alter = groupMemOnAlterForCgroup, - .group_mem_on_drop = groupMemOnDropForCgroup, - .group_mem_on_notify = groupMemOnNotifyForCgroup, - .group_mem_on_dump = groupMemOnDumpForCgroup, -}; - /* * Estimate size the resource group structures will need in * shared memory. @@ -508,10 +343,6 @@ ResGroupControlInit(void) */ pResGroupControl->loaded = false; pResGroupControl->nGroups = MaxResourceGroups; - pResGroupControl->totalChunks = 0; - pg_atomic_init_u32(&pResGroupControl->safeChunksThreshold100, 0); - pg_atomic_init_u32(&pResGroupControl->freeChunks, 0); - pResGroupControl->chunkSizeInBits = BITS_IN_MB; for (i = 0; i < MaxResourceGroups; i++) pResGroupControl->groups[i].groupId = InvalidOid; @@ -527,6 +358,43 @@ ResGroupControlInit(void) errmsg("not enough shared memory for resource group control"))); } + +/* + * Initialize the global CGroupOpsRoutine struct of resource groups. + */ +void +CGroupOpsAndInfoInit(void) +{ + bool found; + int size; + + size = sizeof(CGroupOpsRoutine); + cgroupOpsRoutine = (CGroupOpsRoutine *) + ShmemInitStruct("global cgroup operations routine", + size, &found); + if (found) + return; + if (cgroupOpsRoutine == NULL) + goto error_out; + + size = sizeof(CGroupSystemInfo); + cgroupSystemInfo = (CGroupSystemInfo *) + ShmemInitStruct("global cgroup system info", + size, &found); + + if (found) + return; + if (cgroupSystemInfo == NULL) + goto error_out; + + return; + +error_out: + ereport(FATAL, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("not enough shared memory for cgroup operations routine"))); +} + /* * Allocate a resource group entry from a hash table */ @@ -614,17 +482,6 @@ InitResGroups(void) if (pResGroupControl->loaded) goto exit; - /* These initialization must be done before createGroup() */ - decideTotalChunks(&pResGroupControl->totalChunks, &pResGroupControl->chunkSizeInBits); - pg_atomic_write_u32(&pResGroupControl->freeChunks, pResGroupControl->totalChunks); - pg_atomic_write_u32(&pResGroupControl->safeChunksThreshold100, - pResGroupControl->totalChunks * (100 - runaway_detector_activation_percent)); - if (pResGroupControl->totalChunks == 0) - ereport(PANIC, - (errcode(ERRCODE_INSUFFICIENT_RESOURCES), - errmsg("insufficient memory available"), - errhint("Increase gp_resource_group_memory_limit"))); - if (gp_resource_group_enable_cgroup_cpuset) { /* Get cpuset from cpuset/gpdb, and transform it into bitset */ @@ -641,21 +498,20 @@ InitResGroups(void) { Oid groupId = ((Form_pg_resgroup) GETSTRUCT(tuple))->oid; ResGroupData *group; - int cpuRateLimit; + Bitmapset *bmsCurrent; GetResGroupCapabilities(relResGroupCapability, groupId, &caps); - cpuRateLimit = caps.cpuRateLimit; group = createGroup(groupId, &caps); Assert(group != NULL); cgroupOpsRoutine->createcgroup(groupId); - cgroupOpsRoutine->setmemorylimit(groupId, caps.memLimit); - - if (caps.cpuRateLimit != CPU_RATE_LIMIT_DISABLED) + + if (CpusetIsEmpty(caps.cpuset)) { - cgroupOpsRoutine->setcpulimit(groupId, caps.cpuRateLimit); + cgroupOpsRoutine->setcpulimit(groupId, caps.cpuHardQuotaLimit); + cgroupOpsRoutine->setcpupriority(groupId, caps.cpuSoftPriority); } else { @@ -678,7 +534,7 @@ InitResGroups(void) "please refer to the Cloudberry Documentations for details"))); } - Assert(caps.cpuRateLimit == CPU_RATE_LIMIT_DISABLED); + Assert(caps.cpuHardQuotaLimit == CPU_HARD_QUOTA_LIMIT_DISABLED); if (bms_is_empty(bmsMissing)) { @@ -823,11 +679,6 @@ ResGroupDropFinish(const ResourceGroupCallbackContext *callbackCtx, if (isCommit) { - bool migrate; - - /* Only migrate processes out of vmtracker groups */ - migrate = group->caps.memAuditor == RESGROUP_MEMORY_AUDITOR_VMTRACKER; - removeGroup(callbackCtx->groupid); if (!CpusetIsEmpty(group->caps.cpuset)) { @@ -842,7 +693,7 @@ ResGroupDropFinish(const ResourceGroupCallbackContext *callbackCtx, } } - cgroupOpsRoutine->destroycgroup(callbackCtx->groupid, migrate); + cgroupOpsRoutine->destroycgroup(callbackCtx->groupid, true); } } PG_CATCH(); @@ -934,7 +785,12 @@ ResGroupAlterOnCommit(const ResourceGroupCallbackContext *callbackCtx) if (callbackCtx->limittype == RESGROUP_LIMIT_TYPE_CPU) { cgroupOpsRoutine->setcpulimit(callbackCtx->groupid, - callbackCtx->caps.cpuRateLimit); + callbackCtx->caps.cpuHardQuotaLimit); + } + else if (callbackCtx->limittype == RESGROUP_LIMIT_TYPE_CPU_SHARES) + { + cgroupOpsRoutine->setcpupriority(callbackCtx->groupid, + callbackCtx->caps.cpuSoftPriority); } else if (callbackCtx->limittype == RESGROUP_LIMIT_TYPE_CPUSET) { @@ -945,18 +801,6 @@ ResGroupAlterOnCommit(const ResourceGroupCallbackContext *callbackCtx) cpuset); } } - else if (callbackCtx->limittype != RESGROUP_LIMIT_TYPE_MEMORY_SPILL_RATIO) - { - Assert(pResGroupControl->totalChunks > 0); - ResGroupCap memLimitGap = 0; - if (callbackCtx->limittype == RESGROUP_LIMIT_TYPE_MEMORY) - memLimitGap = callbackCtx->oldCaps.memLimit - callbackCtx->caps.memLimit; - group->memGap += pResGroupControl->totalChunks * memLimitGap / 100; - - Assert(group->groupMemOps != NULL); - if (group->groupMemOps->group_mem_on_alter) - group->groupMemOps->group_mem_on_alter(callbackCtx->groupid, group); - } /* reset default group if cpuset has changed */ if (strcmp(callbackCtx->oldCaps.cpuset, callbackCtx->caps.cpuset) && gp_resource_group_enable_cgroup_cpuset) @@ -1024,29 +868,6 @@ GetMyResGroupId(void) return self->groupId; } -int32 -ResGroupGetVmemLimitChunks(void) -{ - Assert(IsResGroupEnabled()); - - return pResGroupControl->totalChunks; -} - -int32 -ResGroupGetVmemChunkSizeInBits(void) -{ - Assert(IsResGroupEnabled()); - - return pResGroupControl->chunkSizeInBits; -} - -int32 -ResGroupGetMaxChunksPerQuery(void) -{ - return ceil(gp_vmem_limit_per_query / - (1024.0 * (1 << (pResGroupControl->chunkSizeInBits - BITS_IN_MB)))); -} - /* * Retrieve statistic information of type from resource group */ @@ -1090,9 +911,6 @@ ResGroupGetStat(Oid groupId, ResGroupStatType type) interval->month = 0; result = IntervalPGetDatum(interval); break; - case RES_GROUP_STAT_MEM_USAGE: - result = CStringGetDatum(groupDumpMemUsage(group)); - break; default: ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), @@ -1113,226 +931,6 @@ ResGroupGetHostPrimaryCount() return (Gp_role == GP_ROLE_EXECUTE ? host_primary_segment_count : pResGroupControl->segmentsOnMaster); } -static char * -groupDumpMemUsage(ResGroupData *group) -{ - StringInfoData memUsage; - - initStringInfo(&memUsage); - - Assert(group->groupMemOps != NULL); - if (group->groupMemOps->group_mem_on_dump) - group->groupMemOps->group_mem_on_dump(group, &memUsage); - - return memUsage.data; -} - -/* - * Dump memory information for current resource group. - */ -void -ResGroupDumpMemoryInfo(void) -{ - ResGroupSlotData *slot = self->slot; - ResGroupData *group = self->group; - - if (group) - { - Assert(selfIsAssigned()); - - write_log("Resource group memory information: " - "current group id is %u, " - "memLimit cap is %d, " - "memSharedQuota cap is %d, " - "memSpillRatio cap is %d, " - "group expected memory limit is %d MB, " - "memory quota granted in currenct group is %d MB, " - "shared quota granted in current group is %d MB, " - "memory assigned to all running slots is %d MB, " - "memory usage in current group is %d MB, " - "memory shared usage in current group is %d MB, " - "memory quota in current slot is %d MB, " - "memory usage in current slot is %d MB, " - "memory usage in current proc is %d MB", - group->groupId, - group->caps.memLimit, - group->caps.memSharedQuota, - group->caps.memSpillRatio, - VmemTracker_ConvertVmemChunksToMB(group->memExpected), - VmemTracker_ConvertVmemChunksToMB(group->memQuotaGranted), - VmemTracker_ConvertVmemChunksToMB(group->memSharedGranted), - VmemTracker_ConvertVmemChunksToMB(group->memQuotaUsed), - VmemTracker_ConvertVmemChunksToMB(group->memUsage), - VmemTracker_ConvertVmemChunksToMB(group->memSharedUsage), - VmemTracker_ConvertVmemChunksToMB(slot->memQuota), - VmemTracker_ConvertVmemChunksToMB(slot->memUsage), - VmemTracker_ConvertVmemChunksToMB(self->memUsage)); - } - else - { - Assert(!selfIsAssigned()); - - write_log("Resource group memory information: " - "memory usage in current proc is %d MB", - VmemTracker_ConvertVmemChunksToMB(self->memUsage)); - } -} - -/* - * Reserve 'memoryChunks' number of chunks for current resource group. - * It will first try to reserve memory from the resource group slot; if the slot - * quota exceeded, it will reserve memory from the shared zone. It fails if the - * shared quota is also exceeded, and no memory is reserved. - * - * 'overuseChunks' number of chunks can be overused for error handling, - * in such a case waiverUsed is marked as true. - */ -bool -ResGroupReserveMemory(int32 memoryChunks, int32 overuseChunks, bool *waiverUsed) -{ - int32 overuseMem; - ResGroupSlotData *slot = self->slot; - ResGroupData *group = self->group; - - /* - * Memories may be allocated before resource group is initialized, - * however,we need to track those memories once resource group is - * enabled, so we use IsResGroupEnabled() instead of - * IsResGroupActivated() here. - */ - if (!IsResGroupEnabled()) - return true; - - Assert(memoryChunks >= 0); - - /* - * Bypass the limit check when we are not in a valid resource group. - * But will update the memory usage of this proc, and it will be added up - * when this proc is assigned to a valid resource group. - */ - self->memUsage += memoryChunks; - if (bypassedGroup) - { - /* - * Do not allow to allocate more than the per proc limit. - */ - if (self->memUsage > self->bypassMemoryLimit) - { - self->memUsage -= memoryChunks; - return false; - } - - /* - * Set group & slot to bypassed ones so we could follow the limitation - * checking logic as normal transactions. - */ - group = bypassedGroup; - slot = &bypassedSlot; - } - else if (!selfIsAssigned()) - return true; - - Assert(bypassedGroup || slotIsInUse(slot)); - Assert(group->memUsage >= 0); - Assert(self->memUsage >= 0); - - /* add memoryChunks into group & slot memory usage */ - overuseMem = groupIncMemUsage(group, slot, memoryChunks); - - /* then check whether there is over usage */ - if (CritSectionCount == 0) - { - if (overuseMem > overuseChunks) - { - /* if the over usage is larger than allowed then revert the change */ - groupDecMemUsage(group, slot, memoryChunks); - - /* also revert in proc */ - self->memUsage -= memoryChunks; - Assert(self->memUsage >= 0); - - if (overuseChunks == 0) - ResGroupDumpMemoryInfo(); - - return false; - } - else if (overuseMem > 0) - { - /* the over usage is within the allowed threshold */ - *waiverUsed = true; - } - } - - return true; -} - -/* - * Release the memory of resource group - */ -void -ResGroupReleaseMemory(int32 memoryChunks) -{ - ResGroupSlotData *slot = self->slot; - ResGroupData *group = self->group; - - if (!IsResGroupEnabled()) - return; - - Assert(memoryChunks >= 0); - Assert(memoryChunks <= self->memUsage); - - self->memUsage -= memoryChunks; - if (bypassedGroup) - { - /* - * Set group & slot to bypassed ones so we could follow the release - * logic as normal transactions. - */ - group = bypassedGroup; - slot = &bypassedSlot; - } - else if (!selfIsAssigned()) - return; - - Assert(bypassedGroup || slotIsInUse(slot)); - - groupDecMemUsage(group, slot, memoryChunks); -} - -int64 -ResourceGroupGetQueryMemoryLimit(void) -{ - ResGroupSlotData *slot = self->slot; - int64 memSpill; - - Assert(Gp_role == GP_ROLE_DISPATCH); - - if (bypassedGroup) - { - int64 bytesInMB = 1 << BITS_IN_MB; - int64 bytesInChunk = (int64) 1 << VmemTracker_GetChunkSizeInBits(); - - /* - * In bypass mode there is a hard memory limit of - * RESGROUP_BYPASS_MODE_MEMORY_LIMIT_ON_QE chunk, - * we should make sure query_mem + misc mem <= chunk. - */ - return Min(bytesInMB, - bytesInChunk * RESGROUP_BYPASS_MODE_MEMORY_LIMIT_ON_QE / 2); - } - - Assert(selfIsAssigned()); - - if (IsResManagerMemoryPolicyNone()) - return 0; - - memSpill = slotGetMemSpill(&slot->caps); - /* memSpill is already converted to chunks */ - Assert(memSpill >= 0); - - return memSpill << VmemTracker_GetChunkSizeInBits(); -} - /* * removeGroup -- remove resource group from share memory and * reclaim the group's memory back to MEM POOL. @@ -1347,12 +945,7 @@ removeGroup(Oid groupId) group = groupHashRemove(groupId); - Assert(group->groupMemOps != NULL); - if (group->groupMemOps->group_mem_on_drop) - group->groupMemOps->group_mem_on_drop(groupId, group); - group->groupId = InvalidOid; - notifyGroupsOnMem(groupId); } /* @@ -1366,7 +959,6 @@ static ResGroupData * createGroup(Oid groupId, const ResGroupCaps *caps) { ResGroupData *group; - int32 chunks; Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); Assert(OidIsValid(groupId)); @@ -1381,210 +973,12 @@ createGroup(Oid groupId, const ResGroupCaps *caps) ProcQueueInit(&group->waitProcs); group->totalExecuted = 0; group->totalQueued = 0; - group->memGap = 0; - group->memUsage = 0; - group->memSharedUsage = 0; - group->memQuotaUsed = 0; - group->groupMemOps = NULL; group->totalQueuedTimeMs = 0; group->lockedForDrop = false; - group->memQuotaGranted = 0; - group->memSharedGranted = 0; - group->memExpected = groupGetMemExpected(caps); - - chunks = mempoolReserve(groupId, group->memExpected); - groupRebalanceQuota(group, chunks, caps); - - bindGroupOperation(group); - return group; } -/* - * Bind operation to resource group according to memory auditor. - */ -static void -bindGroupOperation(ResGroupData *group) -{ - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - - if (group->caps.memAuditor == RESGROUP_MEMORY_AUDITOR_VMTRACKER) - group->groupMemOps = &resgroup_memory_operations_vmtracker; - else if (group->caps.memAuditor == RESGROUP_MEMORY_AUDITOR_CGROUP) - group->groupMemOps = &resgroup_memory_operations_cgroup; - else - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid memory auditor: %d", group->caps.memAuditor))); -} - -/* - * Add chunks into group and slot memory usage. - * - * Return the total over used chunks of global share - */ -static int32 -groupIncMemUsage(ResGroupData *group, ResGroupSlotData *slot, int32 chunks) -{ - int32 slotMemUsage; /* the memory current slot has been used */ - int32 sharedMemUsage; /* the total shared memory usage, - sum of group share and global share */ - int32 globalOveruse = 0; /* the total over used chunks of global share*/ - - /* Add the chunks to memUsage in slot */ - slotMemUsage = pg_atomic_add_fetch_u32((pg_atomic_uint32 *) &slot->memUsage, - chunks); - - /* Check whether shared memory should be added */ - sharedMemUsage = slotMemUsage - slot->memQuota; - if (sharedMemUsage > 0) - { - /* Decide how many chunks should be counted as shared memory */ - int32 deltaSharedMemUsage = Min(sharedMemUsage, chunks); - - /* Add these chunks to memSharedUsage in group, - * and record the old value*/ - int32 oldSharedUsage = pg_atomic_fetch_add_u32((pg_atomic_uint32 *) - &group->memSharedUsage, - deltaSharedMemUsage); - /* the free space of group share */ - int32 oldSharedFree = Max(0, group->memSharedGranted - oldSharedUsage); - - /* Calculate the global over used chunks */ - int32 deltaGlobalSharedMemUsage = Max(0, deltaSharedMemUsage - oldSharedFree); - - /* freeChunks -= deltaGlobalSharedMemUsage and get the new value */ - int32 newFreeChunks = pg_atomic_sub_fetch_u32(&pResGroupControl->freeChunks, - deltaGlobalSharedMemUsage); - /* calculate the total over used chunks of global share */ - globalOveruse = Max(0, 0 - newFreeChunks); - } - - /* Add the chunks to memUsage in group */ - pg_atomic_add_fetch_u32((pg_atomic_uint32 *) &group->memUsage, - chunks); - - return globalOveruse; -} - -/* - * Sub chunks from group ,slot memory usage and global shared memory. - * return memory chunks of global shared released this time - */ -static int32 -groupDecMemUsage(ResGroupData *group, ResGroupSlotData *slot, int32 chunks) -{ - int32 value; - int32 slotMemUsage; - int32 sharedMemUsage; - - /* Sub chunks from memUsage in group */ - value = pg_atomic_sub_fetch_u32((pg_atomic_uint32 *) &group->memUsage, - chunks); - Assert(value >= 0); - - /* Sub chunks from memUsage in slot */ - slotMemUsage = pg_atomic_fetch_sub_u32((pg_atomic_uint32 *) &slot->memUsage, - chunks); - Assert(slotMemUsage >= chunks); - - /* Check whether shared memory should be subed */ - sharedMemUsage = slotMemUsage - slot->memQuota; - if (sharedMemUsage > 0) - { - /* Decide how many chunks should be counted as shared memory */ - int32 deltaSharedMemUsage = Min(sharedMemUsage, chunks); - - /* Sub chunks from memSharedUsage in group */ - int32 oldSharedUsage = pg_atomic_fetch_sub_u32((pg_atomic_uint32 *) &group->memSharedUsage, - deltaSharedMemUsage); - - /* record the total global share usage of current group */ - int32 grpTotalGlobalUsage = Max(0, oldSharedUsage - group->memSharedGranted); - /* calculate the global share usage of current release */ - int32 deltaGlobalSharedMemUsage = Min(grpTotalGlobalUsage, deltaSharedMemUsage); - /* add chunks to global shared memory */ - pg_atomic_add_fetch_u32(&pResGroupControl->freeChunks, - deltaGlobalSharedMemUsage); - return deltaGlobalSharedMemUsage; - } - - return 0; -} - -/* - * Add the chunks of a slot in a group, it's used when move a query to a resource group - * - * Return the total over used chunks of global share - */ -static int32 -groupIncSlotMemUsage(ResGroupData *group, ResGroupSlotData *slot) -{ - int32 slotSharedMemUsage; /* the slot shared memory usage */ - int32 globalOveruse = 0; /* the total over used chunks of global share*/ - - /* Check whether shared memory should be added */ - slotSharedMemUsage = slot->memUsage - slot->memQuota; - if (slotSharedMemUsage > 0) - { - /* Add these chunks to memSharedUsage in group, - * and record the old value*/ - int32 oldSharedUsage = pg_atomic_fetch_add_u32((pg_atomic_uint32 *) - &group->memSharedUsage, - slotSharedMemUsage); - /* the free space of group share */ - int32 oldSharedFree = Max(0, group->memSharedGranted - oldSharedUsage); - - /* Calculate the global over used chunks */ - int32 deltaGlobalSharedMemUsage = Max(0, slotSharedMemUsage - oldSharedFree); - - /* freeChunks -= deltaGlobalSharedMemUsage and get the new value */ - int32 newFreeChunks = pg_atomic_sub_fetch_u32(&pResGroupControl->freeChunks, - deltaGlobalSharedMemUsage); - /* calculate the total over used chunks of global share */ - globalOveruse = Max(0, 0 - newFreeChunks); - } - - /* Add the chunks to memUsage in group */ - pg_atomic_add_fetch_u32((pg_atomic_uint32 *) &group->memUsage, slot->memUsage); - - return globalOveruse; -} - -/* - * Deduct the chunks of a slot in a group, it's used when move a query to a resource group - */ -static void -groupDecSlotMemUsage(ResGroupData *group, ResGroupSlotData *slot) -{ - int32 value; - int32 slotSharedMemUsage; - - /* Sub chunks from memUsage in group */ - value = pg_atomic_sub_fetch_u32((pg_atomic_uint32 *) &group->memUsage, - slot->memUsage); - Assert(value >= 0); - - /* Check whether shared memory should be subed */ - slotSharedMemUsage = slot->memUsage - slot->memQuota; - if (slotSharedMemUsage <= 0) - return; - - /* Sub chunks from memSharedUsage in group */ - int32 oldSharedUsage = pg_atomic_fetch_sub_u32((pg_atomic_uint32 *) &group->memSharedUsage, - slotSharedMemUsage); - Assert(oldSharedUsage >= slotSharedMemUsage); - - /* record the total global share usage of current group */ - int32 grpTotalGlobalUsage = Max(0, oldSharedUsage - group->memSharedGranted); - /* calculate the global share usage of current release */ - int32 deltaGlobalSharedMemUsage = Min(grpTotalGlobalUsage, slotSharedMemUsage); - /* add chunks to global shared memory */ - pg_atomic_add_fetch_u32(&pResGroupControl->freeChunks, - deltaGlobalSharedMemUsage); -} - /* * Attach a process (QD or QE) to a slot. */ @@ -1594,18 +988,15 @@ selfAttachResGroup(ResGroupData *group, ResGroupSlotData *slot) selfSetGroup(group); selfSetSlot(slot); - groupIncMemUsage(group, slot, self->memUsage); pg_atomic_add_fetch_u32((pg_atomic_uint32*) &slot->nProcs, 1); } - /* * Detach a process (QD or QE) from a slot. */ static void selfDetachResGroup(ResGroupData *group, ResGroupSlotData *slot) { - groupDecMemUsage(group, slot, self->memUsage); pg_atomic_sub_fetch_u32((pg_atomic_uint32*) &slot->nProcs, 1); selfUnsetSlot(); selfUnsetGroup(); @@ -1615,7 +1006,7 @@ selfDetachResGroup(ResGroupData *group, ResGroupSlotData *slot) * Initialize the members of a slot */ static void -initSlot(ResGroupSlotData *slot, ResGroupData *group, int32 slotMemQuota) +initSlot(ResGroupSlotData *slot, ResGroupData *group) { Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); Assert(!slotIsInUse(slot)); @@ -1624,8 +1015,6 @@ initSlot(ResGroupSlotData *slot, ResGroupData *group, int32 slotMemQuota) slot->group = group; slot->groupId = group->groupId; slot->caps = group->caps; - slot->memQuota = slotMemQuota; - slot->memUsage = 0; } /* @@ -1657,8 +1046,6 @@ slotpoolInit(void) slot->group = NULL; slot->groupId = InvalidOid; - slot->memQuota = -1; - slot->memUsage = 0; slot->next = next; next = slot; @@ -1698,8 +1085,6 @@ slotpoolFreeSlot(ResGroupSlotData *slot) slot->group = NULL; slot->groupId = InvalidOid; - slot->memQuota = -1; - slot->memUsage = 0; slot->next = pResGroupControl->freeSlot; pResGroupControl->freeSlot = slot; @@ -1721,7 +1106,6 @@ groupGetSlot(ResGroupData *group) { ResGroupSlotData *slot; ResGroupCaps *caps; - int32 slotMemQuota; Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); Assert(Gp_role == GP_ROLE_DISPATCH); @@ -1733,15 +1117,11 @@ groupGetSlot(ResGroupData *group) if (group->nRunning >= caps->concurrency) return NULL; - slotMemQuota = groupReserveMemQuota(group); - if (slotMemQuota < 0) - return NULL; - /* Now actually get a free slot */ slot = slotpoolAllocSlot(); Assert(!slotIsInUse(slot)); - initSlot(slot, group, slotMemQuota); + initSlot(slot, group); group->nRunning++; @@ -1757,77 +1137,12 @@ groupGetSlot(ResGroupData *group) static void groupPutSlot(ResGroupData *group, ResGroupSlotData *slot) { - int32 released; - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - Assert(group->memQuotaUsed >= 0); Assert(slotIsInUse(slot)); - /* Return the memory quota granted to this slot */ - groupReleaseMemQuota(group, slot); - /* Return the slot back to free list */ slotpoolFreeSlot(slot); group->nRunning--; - - /* And finally release the overused memory quota */ - released = mempoolAutoRelease(group); - if (released > 0) - notifyGroupsOnMem(group->groupId); - - /* - * Once we have waken up other groups then the slot we just released - * might be reused, so we should not touch it anymore since now. - */ -} - -/* - * Reserve memory quota for a slot in group. - * - * If there is not enough free memory quota then return -1 and nothing - * is changed; otherwise return the reserved quota size. - */ -static int32 -groupReserveMemQuota(ResGroupData *group) -{ - ResGroupCaps *caps; - int32 slotMemQuota; - - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - Assert(Gp_role == GP_ROLE_DISPATCH); - Assert(pResGroupControl->segmentsOnMaster > 0); - - caps = &group->caps; - mempoolAutoReserve(group, caps); - - /* Calculate the expected per slot quota */ - slotMemQuota = slotGetMemQuotaExpected(caps); - Assert(slotMemQuota >= 0); - - Assert(group->memQuotaUsed >= 0); - Assert(group->memQuotaUsed <= group->memQuotaGranted); - - if (group->memQuotaUsed + slotMemQuota > group->memQuotaGranted) - { - /* No enough memory quota available, give up */ - return -1; - } - - group->memQuotaUsed += slotMemQuota; - - return slotMemQuota; -} - -/* - * Release a slot's memory quota to group. - */ -static void -groupReleaseMemQuota(ResGroupData *group, ResGroupSlotData *slot) -{ - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - - group->memQuotaUsed -= slot->memQuota; - Assert(group->memQuotaUsed >= 0); } /* @@ -1997,276 +1312,6 @@ groupAcquireSlot(ResGroupInfo *pGroupInfo, bool isMoveQuery) return slot; } -/* - * Wake up the backends in the wait queue when 'concurrency' is increased. - * This function is called in the callback function of ALTER RESOURCE GROUP. - * - * Return TRUE if any memory quota or shared quota is returned to MEM POOL. - */ -static bool -groupApplyMemCaps(ResGroupData *group) -{ - int32 reserved; - int32 released; - const ResGroupCaps *caps = &group->caps; - - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - - group->memExpected = groupGetMemExpected(caps); - - released = mempoolAutoRelease(group); - Assert(released >= 0); - - /* - * suppose rg1 has memory_limit=10, memory_shared_quota=40, - * and session1 is running in rg1. - * - * now we alter rg1 memory_limit to 40 in another session, - * apparently both memory quota and shared quota are expected to increase, - * however as our design is to let them increase on new queries, - * then for session1 it won't see memory shared quota being increased - * until new queries being executed in rg1. - * - * so we should try to acquire the new quota immediately. - */ - reserved = mempoolAutoReserve(group, caps); - Assert(reserved >= 0); - - return released > reserved; -} - -/* - * Get quota from MEM POOL. - * - * chunks is the expected amount to get. - * - * return the actual got chunks, might be smaller than expectation. - */ -static int32 -mempoolReserve(Oid groupId, int32 chunks) -{ - int32 oldFreeChunks; - int32 newFreeChunks; - int32 reserved = 0; - - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - - /* Compare And Save to avoid concurrency problem without using lock */ - while (true) - { - oldFreeChunks = pg_atomic_read_u32(&pResGroupControl->freeChunks); - reserved = Min(Max(0, oldFreeChunks), chunks); - newFreeChunks = oldFreeChunks - reserved; - if (reserved == 0) - break; - if (pg_atomic_compare_exchange_u32(&pResGroupControl->freeChunks, - (uint32 *) &oldFreeChunks, - (uint32) newFreeChunks)) - break; - } - - /* also update the safeChunksThreshold which is used in runaway detector */ - if (reserved != 0) - { - uint32 safeChunksThreshold100; - int safeChunksDelta100; - - safeChunksThreshold100 = (uint32) pg_atomic_read_u32(&pResGroupControl->safeChunksThreshold100); - safeChunksDelta100 = reserved * (100 - runaway_detector_activation_percent); - - if (safeChunksThreshold100 < safeChunksDelta100) - elog(ERROR, "safeChunksThreshold: %u should be positive after mempool reserved: %d", - safeChunksThreshold100, safeChunksDelta100); - - pg_atomic_sub_fetch_u32(&pResGroupControl->safeChunksThreshold100, safeChunksDelta100); - } - LOG_RESGROUP_DEBUG(LOG, "allocate %u out of %u chunks to group %d", - reserved, oldFreeChunks, groupId); - - Assert(newFreeChunks <= pResGroupControl->totalChunks); - - return reserved; -} - -/* - * Return chunks to MEM POOL. - */ -static void -mempoolRelease(Oid groupId, int32 chunks) -{ - int32 newFreeChunks; - - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - Assert(chunks >= 0); - - newFreeChunks = pg_atomic_add_fetch_u32(&pResGroupControl->freeChunks, - chunks); - - /* also update the safeChunksThreshold which is used in runaway detector */ - pg_atomic_add_fetch_u32(&pResGroupControl->safeChunksThreshold100, - chunks * (100 - runaway_detector_activation_percent)); - - LOG_RESGROUP_DEBUG(LOG, "free %u to pool(%u) chunks from group %d", - chunks, newFreeChunks - chunks, groupId); - - Assert(newFreeChunks <= pResGroupControl->totalChunks); -} - -/* - * Assign the chunks we get from the MEM POOL to group and rebalance - * them into the 'quota' and 'shared' part of the group, the amount - * is calculated from caps. - */ -static void -groupRebalanceQuota(ResGroupData *group, int32 chunks, const ResGroupCaps *caps) -{ - int32 delta; - int32 memQuotaGranted = groupGetMemQuotaExpected(caps); - - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - - delta = memQuotaGranted - group->memQuotaGranted; - if (delta >= 0) - { - delta = Min(chunks, delta); - - group->memQuotaGranted += delta; - chunks -= delta; - } - - group->memSharedGranted += chunks; -} - -/* - * Calculate the total memory chunks of the segment - */ -static void -decideTotalChunks(int32 *totalChunks, int32 *chunkSizeInBits) -{ - int32 nsegments; - int32 tmptotalChunks; - int32 tmpchunkSizeInBits; - - nsegments = Gp_role == GP_ROLE_EXECUTE ? host_primary_segment_count : pResGroupControl->segmentsOnMaster; - Assert(nsegments > 0); - - tmptotalChunks = cgroupOpsRoutine->gettotalmemory() * gp_resource_group_memory_limit / nsegments; - - /* - * If vmem is larger than 16GB (i.e., 16K MB), we make the chunks bigger - * so that the vmem limit in chunks unit is not larger than 16K. - */ - tmpchunkSizeInBits = BITS_IN_MB; - while(tmptotalChunks > (16 * 1024)) - { - tmpchunkSizeInBits++; - tmptotalChunks >>= 1; - } - - *totalChunks = tmptotalChunks; - *chunkSizeInBits = tmpchunkSizeInBits; -} - -/* - * Get total expected memory quota of a group in chunks - */ -static int32 -groupGetMemExpected(const ResGroupCaps *caps) -{ - Assert(pResGroupControl->totalChunks > 0); - return pResGroupControl->totalChunks * caps->memLimit / 100; -} - -/* - * Get per-group expected memory quota in chunks - */ -static int32 -groupGetMemQuotaExpected(const ResGroupCaps *caps) -{ - if (caps->concurrency > 0) - return slotGetMemQuotaExpected(caps) * caps->concurrency; - else - return groupGetMemExpected(caps) * - (100 - caps->memSharedQuota) / 100; -} - -/* - * Get per-group expected memory shared quota in chunks - */ -static int32 -groupGetMemSharedExpected(const ResGroupCaps *caps) -{ - return groupGetMemExpected(caps) - groupGetMemQuotaExpected(caps); -} - -/* - * Get per-group expected memory spill in chunks - */ -static int32 -groupGetMemSpillTotal(const ResGroupCaps *caps) -{ - if (memory_spill_ratio != RESGROUP_FALLBACK_MEMORY_SPILL_RATIO) - /* memSpill is in percentage mode */ - return groupGetMemExpected(caps) * memory_spill_ratio / 100; - else - /* memSpill is in fallback mode, return statement_mem instead */ - return VmemTracker_ConvertVmemMBToChunks(statement_mem >> 10); -} - -/* - * Get per-slot expected memory quota in chunks - */ -static int32 -slotGetMemQuotaExpected(const ResGroupCaps *caps) -{ - Assert(caps->concurrency != 0); - return groupGetMemExpected(caps) * - (100 - caps->memSharedQuota) / 100 / - caps->concurrency; -} - -/* - * Get per-slot expected memory quota in chunks on QE. - */ -static int32 -slotGetMemQuotaOnQE(const ResGroupCaps *caps, ResGroupData *group) -{ - int nFreeSlots = caps->concurrency - group->nRunning; - - /* - * On QE the runtime status must also be considered as it might have - * different caps with QD. - */ - if (nFreeSlots <= 0) - return Min(slotGetMemQuotaExpected(caps), - (group->memQuotaGranted - group->memQuotaUsed) / caps->concurrency); - else - return Min(slotGetMemQuotaExpected(caps), - (group->memQuotaGranted - group->memQuotaUsed) / nFreeSlots); -} - -/* - * Get per-slot expected memory spill in chunks - */ -static int32 -slotGetMemSpill(const ResGroupCaps *caps) -{ - if (memory_spill_ratio != RESGROUP_FALLBACK_MEMORY_SPILL_RATIO) - { - /* memSpill is in percentage mode */ - Assert(caps->concurrency != 0); - return groupGetMemSpillTotal(caps) / caps->concurrency; - } - else - { - /* - * memSpill is in fallback mode, it is an absolute value, no need to - * divide by concurrency. - */ - return groupGetMemSpillTotal(caps); - } -} - /* * Attempt to wake up pending slots in the group. * @@ -2298,164 +1343,13 @@ wakeupSlots(ResGroupData *group, bool grant) break; } - /* wake up one process in the wait queue */ - waitProc = groupWaitQueuePop(group); - - waitProc->resSlot = slot; - - procWakeup(waitProc); - } -} - -/* - * When a group returns chunks to MEM POOL, we need to: - * 1. For groups with vmtracker memory auditor, wake up the - * transactions waiting on them for memory quota. - * 2. For groups with cgroup memory auditor, increase their - * memory limit if needed. - */ -static void -notifyGroupsOnMem(Oid skipGroupId) -{ - int i; - - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - - for (i = 0; i < MaxResourceGroups; i++) - { - ResGroupData *group = &pResGroupControl->groups[i]; - - if (group->groupId == InvalidOid) - continue; - - if (group->groupId == skipGroupId) - continue; - - Assert(group->groupMemOps != NULL); - if (group->groupMemOps->group_mem_on_notify) - group->groupMemOps->group_mem_on_notify(group); - - if (!pg_atomic_read_u32(&pResGroupControl->freeChunks)) - break; - } -} - -/* - * Release overused memory quota to MEM POOL. - * - * Both overused shared and non-shared memory quota will be released. - * - * If there was enough non-shared memory quota for free slots, - * then after this call there will still be enough non-shared memory quota. - * - * If this function is called after a slot is released, make sure that - * group->nRunning is updated before this function. - * - * Return the total released quota in chunks, can be 0. - * - * XXX: Some examples. - * - * Suppose concurrency is 10, running is 4, - * memory limit is 0.5, memory shared is 0.4 - * - * assume currentSharedUsage is 0 - * - * currentSharedStocks is 0.5*0.4 = 0.2 - * memQuotaGranted is 0.5*0.6 = 0.3 - * memStocksInuse is 0.5*0.4/10*6 = 0.12 - * memStocksFree is 0.3 - 0.12 = 0.18 - * - * * memLimit: 0.5 -> 0.4 - * for memQuotaGranted we could free 0.18 - 0.4*0.6/10*6 = 0.18-0.144 = 0.036 - * new memQuotaGranted is 0.3-0.036 = 0.264 - * new memStocksFree is 0.18-0.036 = 0.144 - * for memShared we could free currentSharedStocks - Max(currentSharedUsage, 0.4*0.4)=0.04 - * new currentSharedStocks is 0.2-0.04 = 0.16 - * - * * concurrency: 10 -> 20 - * for memQuotaGranted we could free 0.144 - 0.4*0.6/20*16 = 0.144 - 0.24*0.8 = -0.048 - * for memShared we could free currentSharedStocks - Max(currentSharedUsage, 0.4*0.4)=0.00 - * - * * memShared: 0.4 -> 0.2 - * for memQuotaGranted we could free 0.144 - 0.4*0.8/20*16 = 0.144 - 0.256 = -0.122 - * for memShared we could free currentSharedUsage - Max(currentSharedUsage, 0.4*0.2)=0.08 - * new currentSharedStocks is 0.16-0.08 = 0.08 - * - * * memShared: 0.2 -> 0.6 - * for memQuotaGranted we could free 0.144 - 0.4*0.4/20*16 = 0.144 - 0.128 = 0.016 - * new memQuotaGranted is 0.264 - 0.016 = 0.248 - * new memStocksFree is 0.144 - 0.016 = 0.128 - * for memShared we could free currentSharedUsage - Max(currentSharedUsage, 0.4*0.6) = -0.18 - * - * * memLimit: 0.4 -> 0.2 - * for memQuotaGranted we could free 0.128 - 0.2*0.4/20*16 = 0.128 - 0.064 = 0.064 - * new memQuotaGranted is 0.248-0.064 = 0.184 - * new memStocksFree is 0.128 - 0.064 = 0.064 - * for memShared we could free currentSharedStocks - Max(currentSharedUsage, 0.2*0.6) = -0.04 - */ -static int32 -mempoolAutoRelease(ResGroupData *group) -{ - int32 memQuotaNeeded; - int32 memQuotaToFree; - int32 memSharedNeeded; - int32 memSharedToFree; - int32 nfreeSlots; - ResGroupCaps *caps = &group->caps; - - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - - /* nfreeSlots is the number of free slots */ - nfreeSlots = caps->concurrency - group->nRunning; - - /* the in use non-shared quota must be reserved */ - memQuotaNeeded = group->memQuotaUsed; - - /* also should reserve enough non-shared quota for free slots */ - memQuotaNeeded += - nfreeSlots > 0 ? slotGetMemQuotaExpected(caps) * nfreeSlots : 0; - - memQuotaToFree = group->memQuotaGranted - memQuotaNeeded; - if (memQuotaToFree > 0) - { - /* release the over used non-shared quota to MEM POOL */ - mempoolRelease(group->groupId, memQuotaToFree); - group->memQuotaGranted -= memQuotaToFree; - } - - memSharedNeeded = Max(group->memSharedUsage, - groupGetMemSharedExpected(caps)); - memSharedToFree = group->memSharedGranted - memSharedNeeded; - if (memSharedToFree > 0) - { - /* release the over used shared quota to MEM POOL */ - mempoolRelease(group->groupId, memSharedToFree); - group->memSharedGranted -= memSharedToFree; - } - - return Max(memQuotaToFree, 0) + Max(memSharedToFree, 0); -} + /* wake up one process in the wait queue */ + waitProc = groupWaitQueuePop(group); -/* - * Try to acquire enough quota & shared quota for current group from MEM POOL, - * the actual acquired quota depends on system loads. - * - * Return the reserved quota in chunks, can be 0. - */ -static int32 -mempoolAutoReserve(ResGroupData *group, const ResGroupCaps *caps) -{ - int32 currentMemStocks = group->memSharedGranted + group->memQuotaGranted; - int32 neededMemStocks = group->memExpected - currentMemStocks; - int32 chunks = 0; + waitProc->resSlot = slot; - if (neededMemStocks > 0) - { - chunks = mempoolReserve(group->groupId, neededMemStocks); - groupRebalanceQuota(group, chunks, caps); + procWakeup(waitProc); } - - return chunks; } /* Update the total queued time of this group */ @@ -2518,15 +1412,9 @@ SerializeResGroupInfo(StringInfo str) itmp = htonl(caps->concurrency); appendBinaryStringInfo(str, (char *) &itmp, sizeof(int32)); - itmp = htonl(caps->cpuRateLimit); - appendBinaryStringInfo(str, (char *) &itmp, sizeof(int32)); - itmp = htonl(caps->memLimit); - appendBinaryStringInfo(str, (char *) &itmp, sizeof(int32)); - itmp = htonl(caps->memSharedQuota); + itmp = htonl(caps->cpuHardQuotaLimit); appendBinaryStringInfo(str, (char *) &itmp, sizeof(int32)); - itmp = htonl(caps->memSpillRatio); - appendBinaryStringInfo(str, (char *) &itmp, sizeof(int32)); - itmp = htonl(caps->memAuditor); + itmp = htonl(caps->cpuSoftPriority); appendBinaryStringInfo(str, (char *) &itmp, sizeof(int32)); cpuset_len = strlen(caps->cpuset); @@ -2561,15 +1449,9 @@ DeserializeResGroupInfo(struct ResGroupCaps *capsOut, memcpy(&itmp, ptr, sizeof(int32)); ptr += sizeof(int32); capsOut->concurrency = ntohl(itmp); memcpy(&itmp, ptr, sizeof(int32)); ptr += sizeof(int32); - capsOut->cpuRateLimit = ntohl(itmp); - memcpy(&itmp, ptr, sizeof(int32)); ptr += sizeof(int32); - capsOut->memLimit = ntohl(itmp); - memcpy(&itmp, ptr, sizeof(int32)); ptr += sizeof(int32); - capsOut->memSharedQuota = ntohl(itmp); - memcpy(&itmp, ptr, sizeof(int32)); ptr += sizeof(int32); - capsOut->memSpillRatio = ntohl(itmp); + capsOut->cpuHardQuotaLimit = ntohl(itmp); memcpy(&itmp, ptr, sizeof(int32)); ptr += sizeof(int32); - capsOut->memAuditor = ntohl(itmp); + capsOut->cpuSoftPriority = ntohl(itmp); memcpy(&itmp, ptr, sizeof(int32)); ptr += sizeof(int32); cpuset_len = ntohl(itmp); @@ -2663,20 +1545,11 @@ AssignResGroupOnMaster(void) /* Initialize the fake slot */ bypassedSlot.group = groupInfo.group; bypassedSlot.groupId = groupInfo.groupId; - bypassedSlot.memQuota = 0; - bypassedSlot.memUsage = 0; - - /* Attach self memory usage to resgroup */ - groupIncMemUsage(bypassedGroup, &bypassedSlot, self->memUsage); - - /* Record the bypass memory limit of current query */ - self->bypassMemoryLimit = self->memUsage + RESGROUP_BYPASS_MODE_MEMORY_LIMIT_ON_QD; /* Add into cgroup */ cgroupOpsRoutine->attachcgroup(bypassedGroup->groupId, MyProcPid, - bypassedGroup->caps.cpuRateLimit == CPU_RATE_LIMIT_DISABLED); + bypassedGroup->caps.cpuHardQuotaLimit == CPU_HARD_QUOTA_LIMIT_DISABLED); - groupSetMemorySpillRatio(&bypassedGroup->caps); return; } @@ -2692,7 +1565,6 @@ AssignResGroupOnMaster(void) /* Set resource group slot for current session */ sessionSetSlot(slot); - /* Add proc memory accounting info into group and slot */ selfAttachResGroup(groupInfo.group, slot); /* Init self */ @@ -2703,10 +1575,7 @@ AssignResGroupOnMaster(void) /* Add into cgroup */ cgroupOpsRoutine->attachcgroup(self->groupId, MyProcPid, - self->caps.cpuRateLimit == CPU_RATE_LIMIT_DISABLED); - - /* Set spill guc */ - groupSetMemorySpillRatio(&slot->caps); + self->caps.cpuHardQuotaLimit == CPU_HARD_QUOTA_LIMIT_DISABLED); } PG_CATCH(); { @@ -2731,9 +1600,6 @@ UnassignResGroup(bool releaseSlot) if (Gp_role == GP_ROLE_DISPATCH) groupDecBypassedRef(bypassedGroup); - /* Detach self memory usage from resgroup */ - groupDecMemUsage(bypassedGroup, &bypassedSlot, self->memUsage); - /* Reset the fake slot */ bypassedSlot.group = NULL; bypassedSlot.groupId = InvalidOid; @@ -2750,10 +1616,6 @@ UnassignResGroup(bool releaseSlot) if (!selfIsAssigned()) return; - /* Cleanup self */ - if (self->memUsage > 10) - LOG_RESGROUP_DEBUG(LOG, "idle proc memory usage: %d", self->memUsage); - LWLockAcquire(ResGroupLock, LW_EXCLUSIVE); /* Sub proc memory accounting info from group and slot */ @@ -2764,8 +1626,6 @@ UnassignResGroup(bool releaseSlot) { if (releaseSlot) { - /* release the memory left in the slot if there's entryDB */ - groupDecSlotMemUsage(group, slot); slot->nProcs = 0; } @@ -2821,15 +1681,6 @@ SwitchResGroupOnSegment(const char *buf, int len) Assert(bypassedGroup != NULL); - /* Initialize the fake slot */ - bypassedSlot.memQuota = 0; - bypassedSlot.memUsage = 0; - - /* Attach self memory usage to resgroup */ - groupIncMemUsage(bypassedGroup, &bypassedSlot, self->memUsage); - - /* Record the bypass memory limit of current query */ - self->bypassMemoryLimit = self->memUsage + RESGROUP_BYPASS_MODE_MEMORY_LIMIT_ON_QE; return; } @@ -2844,11 +1695,7 @@ SwitchResGroupOnSegment(const char *buf, int len) /* it's not the first dispatch in the same transaction */ Assert(self->groupId == newGroupId); Assert(self->caps.concurrency == caps.concurrency); - Assert(self->caps.cpuRateLimit == caps.cpuRateLimit); - Assert(self->caps.memLimit == caps.memLimit); - Assert(self->caps.memSharedQuota == caps.memSharedQuota); - Assert(self->caps.memSpillRatio == caps.memSpillRatio); - Assert(self->caps.memAuditor == caps.memAuditor); + Assert(self->caps.cpuHardQuotaLimit == caps.cpuHardQuotaLimit); Assert(!strcmp(self->caps.cpuset, caps.cpuset)); return; } @@ -2875,11 +1722,7 @@ SwitchResGroupOnSegment(const char *buf, int len) slot = slotpoolAllocSlot(); Assert(!slotIsInUse(slot)); sessionSetSlot(slot); - mempoolAutoReserve(group, &caps); - initSlot(slot, group, - slotGetMemQuotaOnQE(&caps, group)); - group->memQuotaUsed += slot->memQuota; - Assert(group->memQuotaUsed <= group->memQuotaGranted); + initSlot(slot, group); group->nRunning++; } @@ -2892,7 +1735,7 @@ SwitchResGroupOnSegment(const char *buf, int len) /* Add into cgroup */ cgroupOpsRoutine->attachcgroup(self->groupId, MyProcPid, - self->caps.cpuRateLimit == CPU_RATE_LIMIT_DISABLED); + self->caps.cpuHardQuotaLimit == CPU_HARD_QUOTA_LIMIT_DISABLED); } /* @@ -3196,30 +2039,6 @@ groupWaitCancel(bool isMoveQuery) groupAwaited = NULL; } -static void -groupSetMemorySpillRatio(const ResGroupCaps *caps) -{ - char value[64]; - - /* No need to set memory_spill_ratio if it is already up-to-date */ - if (caps->memSpillRatio == memory_spill_ratio) - return; - - snprintf(value, sizeof(value), "%d", caps->memSpillRatio); - set_config_option("memory_spill_ratio", value, PGC_USERSET, PGC_S_RESGROUP, - GUC_ACTION_SET, true, 0, false); -} - -void -ResGroupGetMemInfo(int *memLimit, int *slotQuota, int *sharedQuota) -{ - const ResGroupCaps *caps = &self->caps; - - *memLimit = groupGetMemExpected(caps); - *slotQuota = caps->concurrency ? slotGetMemQuotaExpected(caps) : -1; - *sharedQuota = groupGetMemSharedExpected(caps); -} - /* * Validate the consistency of the resgroup information in self. * @@ -3228,8 +2047,6 @@ ResGroupGetMemInfo(int *memLimit, int *slotQuota, int *sharedQuota) static void selfValidateResGroupInfo(void) { - Assert(self->memUsage >= 0); - AssertImply(self->groupId != InvalidOid, self->group != NULL); } @@ -3414,8 +2231,6 @@ slotValidate(const ResGroupSlotData *slot) if (slot->groupId == InvalidOid) { Assert(slot->nProcs == 0); - Assert(slot->memQuota < 0); - Assert(slot->memUsage == 0); } else { @@ -3818,9 +2633,6 @@ ResGroupDumpInfo(StringInfo str) /* dump fields in pResGroupControl. */ appendStringInfo(str, "\"segmentsOnMaster\":%d,", pResGroupControl->segmentsOnMaster); appendStringInfo(str, "\"loaded\":%s,", pResGroupControl->loaded ? "true" : "false"); - appendStringInfo(str, "\"totalChunks\":%d,", pResGroupControl->totalChunks); - appendStringInfo(str, "\"freeChunks\":%d,", pg_atomic_read_u32(&pResGroupControl->freeChunks)); - appendStringInfo(str, "\"chunkSizeInBits\":%d,", pResGroupControl->chunkSizeInBits); /* dump each group */ appendStringInfo(str, "\"groups\":["); @@ -3850,12 +2662,6 @@ resgroupDumpGroup(StringInfo str, ResGroupData *group) appendStringInfo(str, "\"nRunning\":%d,", group->nRunning); appendStringInfo(str, "\"nRunningBypassed\":%d,", group->nRunningBypassed); appendStringInfo(str, "\"locked_for_drop\":%d,", group->lockedForDrop); - appendStringInfo(str, "\"memExpected\":%d,", group->memExpected); - appendStringInfo(str, "\"memQuotaGranted\":%d,", group->memQuotaGranted); - appendStringInfo(str, "\"memSharedGranted\":%d,", group->memSharedGranted); - appendStringInfo(str, "\"memQuotaUsed\":%d,", group->memQuotaUsed); - appendStringInfo(str, "\"memUsage\":%d,", group->memUsage); - appendStringInfo(str, "\"memSharedUsage\":%d,", group->memSharedUsage); resgroupDumpWaitQueue(str, &group->waitProcs); resgroupDumpCaps(str, (ResGroupCap*)(&group->caps)); @@ -3928,8 +2734,6 @@ resgroupDumpSlots(StringInfo str) appendStringInfo(str, "{"); appendStringInfo(str, "\"slotId\":%d,", i); appendStringInfo(str, "\"groupId\":%u,", slot->groupId); - appendStringInfo(str, "\"memQuota\":%d,", slot->memQuota); - appendStringInfo(str, "\"memUsage\":%d,", slot->memUsage); appendStringInfo(str, "\"nProcs\":%d,", slot->nProcs); appendStringInfo(str, "\"next\":%d,", slotGetId(slot->next)); resgroupDumpCaps(str, (ResGroupCap*)(&slot->caps)); @@ -4007,235 +2811,6 @@ sessionGetSlot(void) return (ResGroupSlotData *) MySessionState->resGroupSlot; } -/* - * Operation for resource groups with vmtracker memory auditor - * when alter its memory limit. - */ -static void -groupMemOnAlterForVmtracker(Oid groupId, ResGroupData *group) -{ - bool shouldNotify; - - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - - shouldNotify = groupApplyMemCaps(group); - - wakeupSlots(group, true); - if (shouldNotify) - notifyGroupsOnMem(groupId); -} - -/* - * Operation for resource groups with vmtracker memory auditor - * when reclaiming its memory back to MEM POOL. - */ -static void -groupMemOnDropForVmtracker(Oid groupId, ResGroupData *group) -{ - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - - mempoolRelease(groupId, group->memQuotaGranted + group->memSharedGranted); - group->memQuotaGranted = 0; - group->memSharedGranted = 0; -} - -/* - * Operation for resource groups with vmtracker memory auditor - * when memory in MEM POOL is increased. - */ -static void -groupMemOnNotifyForVmtracker(ResGroupData *group) -{ - int32 delta; - - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - - if (Gp_role != GP_ROLE_DISPATCH) - return; - - if (group->lockedForDrop) - return; - - if (groupWaitQueueIsEmpty(group)) - return; - - delta = group->memExpected - group->memQuotaGranted - group->memSharedGranted; - if (delta <= 0) - return; - - wakeupSlots(group, true); -} - -/* - * Operation for resource groups with vmtracker memory auditor - * when dump memory statistics. - */ -static void -groupMemOnDumpForVmtracker(ResGroupData *group, StringInfo str) -{ - appendStringInfo(str, "{"); - appendStringInfo(str, "\"used\":%d, ", - VmemTracker_ConvertVmemChunksToMB(group->memUsage)); - appendStringInfo(str, "\"available\":%d, ", - VmemTracker_ConvertVmemChunksToMB( - group->memQuotaGranted + group->memSharedGranted - group->memUsage)); - appendStringInfo(str, "\"quota_used\":%d, ", - VmemTracker_ConvertVmemChunksToMB(group->memQuotaUsed)); - appendStringInfo(str, "\"quota_available\":%d, ", - VmemTracker_ConvertVmemChunksToMB( - group->memQuotaGranted - group->memQuotaUsed)); - appendStringInfo(str, "\"quota_granted\":%d, ", - VmemTracker_ConvertVmemChunksToMB(group->memQuotaGranted)); - appendStringInfo(str, "\"quota_proposed\":%d, ", - VmemTracker_ConvertVmemChunksToMB( - groupGetMemQuotaExpected(&group->caps))); - appendStringInfo(str, "\"shared_used\":%d, ", - VmemTracker_ConvertVmemChunksToMB(group->memSharedUsage)); - appendStringInfo(str, "\"shared_available\":%d, ", - VmemTracker_ConvertVmemChunksToMB( - group->memSharedGranted - group->memSharedUsage)); - appendStringInfo(str, "\"shared_granted\":%d, ", - VmemTracker_ConvertVmemChunksToMB(group->memSharedGranted)); - appendStringInfo(str, "\"shared_proposed\":%d", - VmemTracker_ConvertVmemChunksToMB( - groupGetMemSharedExpected(&group->caps))); - appendStringInfo(str, "}"); -} - -/* - * Operation for resource groups with cgroup memory auditor - * when alter its memory limit. - */ -static void -groupMemOnAlterForCgroup(Oid groupId, ResGroupData *group) -{ - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - - /* - * If memGap is positive, it indicates this group should - * give back these many memory back to MEM POOL. - * - * If memGap is negative, it indicates this group should - * retrieve these many memory from MEM POOL. - * - * If memGap is zero, this group is holding the same memory - * as it expects. - */ - if (group->memGap == 0) - return; - - if (group->memGap > 0) - groupApplyCgroupMemDec(group); - else - groupApplyCgroupMemInc(group); -} - -/* - * Increase a resource group's cgroup memory limit - * - * This may not take effect immediately. - */ -static void -groupApplyCgroupMemInc(ResGroupData *group) -{ - CGroupComponentType component = CGROUP_COMPONENT_MEMORY; - int32 memory_limit_chunks; - int32 memory_inc_chunks; - int fd; - - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - Assert(group->memGap < 0); - - memory_inc_chunks = mempoolReserve(group->groupId, group->memGap * -1); - - if (memory_inc_chunks <= 0) - return; - - fd = cgroupOpsRoutine->lockcgroup(group->groupId, component, true); - memory_limit_chunks = cgroupOpsRoutine->getmemorylimitchunks(group->groupId); - cgroupOpsRoutine->setmemorylimitbychunks(group->groupId, memory_limit_chunks + memory_inc_chunks); - cgroupOpsRoutine->unlockcgroup(fd); - - group->memGap += memory_inc_chunks; -} - -/* - * Decrease a resource group's cgroup memory limit - * - * This will take effect immediately for now. - */ -static void -groupApplyCgroupMemDec(ResGroupData *group) -{ - CGroupComponentType component = CGROUP_COMPONENT_MEMORY; - int32 memory_limit; - int32 memory_dec; - int fd; - - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - Assert(group->memGap > 0); - - fd = cgroupOpsRoutine->lockcgroup(group->groupId, component, true); - memory_limit = cgroupOpsRoutine->getmemorylimitchunks(group->groupId); - Assert(memory_limit > group->memGap); - - memory_dec = group->memGap; - - cgroupOpsRoutine->setmemorylimitbychunks(group->groupId, memory_limit - memory_dec); - cgroupOpsRoutine->unlockcgroup(fd); - - mempoolRelease(group->groupId, memory_dec); - notifyGroupsOnMem(group->groupId); - - group->memGap -= memory_dec; -} - -/* - * Operation for resource groups with cgroup memory auditor - * when reclaiming its memory back to MEM POOL. - */ -static void -groupMemOnDropForCgroup(Oid groupId, ResGroupData *group) -{ - int32 memory_expected; - - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - - memory_expected = groupGetMemExpected(&group->caps); - - mempoolRelease(groupId, memory_expected + group->memGap); -} - -/* - * Operation for resource groups with cgroup memory auditor - * when memory in MEM POOL is increased. - */ -static void -groupMemOnNotifyForCgroup(ResGroupData *group) -{ - Assert(LWLockHeldByMeInMode(ResGroupLock, LW_EXCLUSIVE)); - - if (group->memGap < 0) - groupApplyCgroupMemInc(group); -} - -/* - * Operation for resource groups with cgroup memory auditor - * when dump memory statistics. - */ -static void -groupMemOnDumpForCgroup(ResGroupData *group, StringInfo str) -{ - appendStringInfo(str, "{"); - appendStringInfo(str, "\"used\":%d, ", - VmemTracker_ConvertVmemChunksToMB( - cgroupOpsRoutine->getmemoryusage(group->groupId) / ResGroupGetHostPrimaryCount())); - appendStringInfo(str, "\"limit_granted\":%d", - VmemTracker_ConvertVmemChunksToMB( - cgroupOpsRoutine->getmemorylimitchunks(group->groupId) / ResGroupGetHostPrimaryCount())); - appendStringInfo(str, "}"); -} - /* * Parse cpuset to bitset * If cpuset is "1,3-5", Bitmapset 1,3,4,5 are set. @@ -4529,90 +3104,6 @@ EnsureCpusetIsAvailable(int elevel) return true; } -/* - * Check whether current resource group's memory usage is in RedZone. - */ -bool -IsGroupInRedZone(void) -{ - uint32 remainGlobalSharedMem; - uint32 safeChunksThreshold100; - ResGroupSlotData *slot = self->slot; - ResGroupData *group = self->group; - - /* - * IsGroupInRedZone is called frequently, we should put the - * condition which returns with higher probability in front. - * - * safe: global shared memory is not in redzone - */ - remainGlobalSharedMem = (uint32) pg_atomic_read_u32(&pResGroupControl->freeChunks); - safeChunksThreshold100 = (uint32) pg_atomic_read_u32(&pResGroupControl->safeChunksThreshold100); - if (remainGlobalSharedMem * 100 >= safeChunksThreshold100) - return false; - - AssertImply(slot != NULL, group != NULL); - if (!slot) - return false; - - /* safe: slot memory is not used up */ - if (slot->memQuota > slot->memUsage) - return false; - - /* safe: group shared memory is not in redzone */ - if (group->memSharedGranted > group->memSharedUsage) - return false; - - /* memory usage in this group is in RedZone */ - return true; -} - - - -/* - * Dump memory information for current resource group. - * This is the output of resource group runaway. - */ -void -ResGroupGetMemoryRunawayInfo(StringInfo str) -{ - ResGroupSlotData *slot = self->slot; - ResGroupData *group = self->group; - uint32 remainGlobalSharedMem = 0; - uint32 safeChunksThreshold100 = 0; - - if (group) - { - Assert(selfIsAssigned()); - - remainGlobalSharedMem = (uint32) pg_atomic_read_u32(&pResGroupControl->freeChunks); - safeChunksThreshold100 = (uint32) pg_atomic_read_u32(&pResGroupControl->safeChunksThreshold100); - - appendStringInfo(str, - "current group id is %u, " - "group memory usage %d MB, " - "group shared memory quota is %d MB, " - "slot memory quota is %d MB, " - "global freechunks memory is %u MB, " - "global safe memory threshold is %u MB", - group->groupId, - VmemTracker_ConvertVmemChunksToMB(group->memUsage), - VmemTracker_ConvertVmemChunksToMB(group->memSharedGranted), - VmemTracker_ConvertVmemChunksToMB(slot->memQuota), - VmemTracker_ConvertVmemChunksToMB(remainGlobalSharedMem), - VmemTracker_ConvertVmemChunksToMB(safeChunksThreshold100 / 100)); - } - else - { - Assert(!selfIsAssigned()); - - appendStringInfo(str, - "Resource group memory information: " - "memory usage in current proc is %d MB", - VmemTracker_ConvertVmemChunksToMB(self->memUsage)); - } -} - /* * Return group id for a session */ @@ -4626,26 +3117,6 @@ SessionGetResGroupId(SessionState *session) return InvalidOid; } -/* - * Return group global share memory for a session - */ -int32 -SessionGetResGroupGlobalShareMemUsage(SessionState *session) -{ - ResGroupSlotData *sessionSlot = (ResGroupSlotData *)session->resGroupSlot; - if (sessionSlot) - { - /* lock not needed here, we just need esimated result */ - ResGroupData *group = sessionSlot->group; - return group->memSharedUsage - group->memSharedGranted; - } - else - { - /* session doesnot have group slot */ - return 0; - } -} - /* * move a proc to a resource group */ @@ -4673,7 +3144,6 @@ HandleMoveResourceGroup(void) { sessionSetSlot(slot); - /* Add proc memory accounting info into group and slot */ selfAttachResGroup(group, slot); /* Init self */ @@ -4681,7 +3151,7 @@ HandleMoveResourceGroup(void) /* Add into cgroup */ cgroupOpsRoutine->attachcgroup(self->groupId, MyProcPid, - self->caps.cpuRateLimit == CPU_RATE_LIMIT_DISABLED); + self->caps.cpuHardQuotaLimit == CPU_HARD_QUOTA_LIMIT_DISABLED); } PG_CATCH(); { @@ -4708,32 +3178,18 @@ HandleMoveResourceGroup(void) Assert(group != NULL); Assert(oldGroup != NULL); - /* - * move the slot memory to the new group, only do it once - * if there're more than once slice. - */ if (slot->groupId != groupId) { - /* deduct the slot memory from the old group */ - groupDecSlotMemUsage(oldGroup, slot); - oldGroup->memQuotaUsed -= slot->memQuota; oldGroup->nRunning--; /* reset the slot but don't touch the 'memUsage' */ slot->groupId = groupId; slot->group = group; slot->caps = group->caps; - slot->memQuota = slotGetMemQuotaOnQE(&group->caps, group); - /* add the slot memory to the new group */ - mempoolAutoReserve(group, &group->caps); - groupIncSlotMemUsage(group, slot); - group->memQuotaUsed += slot->memQuota; group->nRunning++; - Assert(group->memQuotaUsed <= group->memQuotaGranted); } - /* add the memory of entryDB to slot and group */ if (IS_QUERY_DISPATCHER()) selfAttachResGroup(group, slot); @@ -4748,63 +3204,8 @@ HandleMoveResourceGroup(void) /* Add into cgroup */ cgroupOpsRoutine->attachcgroup(self->groupId, MyProcPid, - self->caps.cpuRateLimit == CPU_RATE_LIMIT_DISABLED); - } -} - -static bool -hasEnoughMemory(int32 memUsed, int32 availMem) -{ - return memUsed < availMem; -} - -/* - * Check if there are enough memory to move the query to the destination group - */ -static void -moveQueryCheck(int sessionId, Oid groupId) -{ - char *cmd; - CdbPgResults cdb_pgresults = {NULL, 0}; - int32 sessionMem = ResGroupGetSessionMemUsage(sessionId); - int32 availMem = ResGroupGetGroupAvailableMem(groupId); - - if (sessionMem < 0) - elog(ERROR, "the process to move has ended"); - - if (!hasEnoughMemory(sessionMem, availMem)) - elog(ERROR, "group %d doesn't have enough memory on master, expect:%d, available:%d", groupId, sessionMem, availMem); - - cmd = psprintf("SELECT session_mem, available_mem from pg_resgroup_check_move_query(%d, %d)", sessionId, groupId); - - CdbDispatchCommand(cmd, DF_WITH_SNAPSHOT, &cdb_pgresults); - - for (int i = 0; i < cdb_pgresults.numResults; i++) - { - int i_session_mem; - int i_available_mem; - struct pg_result *pgresult = cdb_pgresults.pg_results[i]; - if (PQresultStatus(pgresult) != PGRES_TUPLES_OK) - { - cdbdisp_clearCdbPgResults(&cdb_pgresults); - elog(ERROR, "pg_resgroup_check_move_query: resultStatus not tuples_Ok: %s %s", - PQresStatus(PQresultStatus(pgresult)), PQresultErrorMessage(pgresult)); - } - - Assert(PQntuples(pgresult) == 1); - i_session_mem = PQfnumber(pgresult, "session_mem"); - i_available_mem = PQfnumber(pgresult, "available_mem"); - Assert(!PQgetisnull(pgresult, 0, i_session_mem)); - Assert(!PQgetisnull(pgresult, 0, i_available_mem)); - sessionMem = pg_atoi(PQgetvalue(pgresult, 0, i_session_mem), sizeof(int32), 0); - availMem = pg_atoi(PQgetvalue(pgresult, 0, i_available_mem), sizeof(int32), 0); - if (sessionMem <= 0) - continue; - if (!hasEnoughMemory(sessionMem, availMem)) - elog(ERROR, "group %d doesn't have enough memory on segment, expect:%d, available:%d", groupId, sessionMem, availMem); + self->caps.cpuHardQuotaLimit == CPU_HARD_QUOTA_LIMIT_DISABLED); } - - cdbdisp_clearCdbPgResults(&cdb_pgresults); } void @@ -4840,8 +3241,6 @@ ResGroupMoveQuery(int sessionId, Oid groupId, const char *groupName) PG_TRY(); { - moveQueryCheck(sessionId, groupId); - ResGroupSignalMoveQuery(sessionId, slot, groupId); cmd = psprintf("SELECT pg_resgroup_move_query(%d, %s)", @@ -4884,50 +3283,3 @@ ResGroupGetGroupIdBySessionId(int sessionId) return groupId; } - -/* - * get the memory usage of a session on one segment - */ -int32 -ResGroupGetSessionMemUsage(int sessionId) -{ - int32 memUsage = -1; - SessionState *curSessionState; - - LWLockAcquire(SessionStateLock, LW_SHARED); - curSessionState = AllSessionStateEntries->usedList; - while (curSessionState != NULL) - { - if (curSessionState->sessionId == sessionId) - { - ResGroupSlotData *slot = (ResGroupSlotData *)curSessionState->resGroupSlot; - memUsage = (slot == NULL) ? 0 : slot->memUsage; - break; - } - curSessionState = curSessionState->next; - } - LWLockRelease(SessionStateLock); - - return memUsage; -} - -/* - * get the memory available in one resource group - */ -int32 -ResGroupGetGroupAvailableMem(Oid groupId) -{ - ResGroupData *group; - int availMem; - - LWLockAcquire(ResGroupLock, LW_SHARED); - group = groupHashFind(groupId, true); - Assert(group != NULL); - if (group->caps.memLimit == RESGROUP_UNLIMITED_MEMORY_LIMIT) - availMem = (uint32) pg_atomic_read_u32(&pResGroupControl->freeChunks); - else - availMem = slotGetMemQuotaExpected(&group->caps) + - group->memSharedGranted - group->memSharedUsage; - LWLockRelease(ResGroupLock); - return availMem; -} diff --git a/src/backend/utils/resgroup/resgroup_helper.c b/src/backend/utils/resgroup/resgroup_helper.c index e36cfc71bf3..3248ac53d77 100644 --- a/src/backend/utils/resgroup/resgroup_helper.c +++ b/src/backend/utils/resgroup/resgroup_helper.c @@ -32,7 +32,6 @@ typedef struct ResGroupStat Datum groupId; StringInfo cpuUsage; - StringInfo memUsage; } ResGroupStat; typedef struct ResGroupStatCtx @@ -104,7 +103,7 @@ getResUsage(ResGroupStatCtx *ctx, Oid inGroupId) initStringInfo(&buffer); appendStringInfo(&buffer, - "SELECT groupid, cpu_usage, memory_usage " + "SELECT groupid, cpu_usage " "FROM pg_resgroup_get_status(%u)", inGroupId); @@ -137,14 +136,8 @@ getResUsage(ResGroupStatCtx *ctx, Oid inGroupId) Assert(groupId == row->groupId); - if (row->memUsage->len == 0) + if (row->cpuUsage->len == 0) { - Datum d = ResGroupGetStat(groupId, RES_GROUP_STAT_MEM_USAGE); - - row->groupId = groupId; - appendStringInfo(row->memUsage, "{\"%d\":%s", - GpIdentity.segindex, DatumGetCString(d)); - appendStringInfo(row->cpuUsage, "{"); calcCpuUsage(row->cpuUsage, usages[j], timestamps[j], cgroupOpsRoutine->getcpuusage(groupId), @@ -154,14 +147,8 @@ getResUsage(ResGroupStatCtx *ctx, Oid inGroupId) result = PQgetvalue(pg_result, j, 1); appendStringInfo(row->cpuUsage, ", %s", result); - result = PQgetvalue(pg_result, j, 2); - appendStringInfo(row->memUsage, ", %s", result); - if (i == cdb_pgresults.numResults - 1) - { appendStringInfoChar(row->cpuUsage, '}'); - appendStringInfoChar(row->memUsage, '}'); - } } } @@ -175,10 +162,6 @@ getResUsage(ResGroupStatCtx *ctx, Oid inGroupId) { ResGroupStat *row = &ctx->groups[j]; Oid groupId = DatumGetObjectId(row->groupId); - Datum d = ResGroupGetStat(groupId, RES_GROUP_STAT_MEM_USAGE); - - appendStringInfo(row->memUsage, "\"%d\":%s", - GpIdentity.segindex, DatumGetCString(d)); calcCpuUsage(row->cpuUsage, usages[j], timestamps[j], cgroupOpsRoutine->getcpuusage(groupId), @@ -209,7 +192,7 @@ pg_resgroup_get_status(PG_FUNCTION_ARGS) { MemoryContext oldcontext; TupleDesc tupdesc; - int nattr = 8; + int nattr = 7; funcctx = SRF_FIRSTCALL_INIT(); @@ -223,7 +206,6 @@ pg_resgroup_get_status(PG_FUNCTION_ARGS) TupleDescInitEntry(tupdesc, (AttrNumber) 5, "num_executed", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "total_queue_duration", INTERVALOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "cpu_usage", JSONOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 8, "memory_usage", JSONOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); @@ -257,7 +239,6 @@ pg_resgroup_get_status(PG_FUNCTION_ARGS) { Assert(funcctx->max_calls < MaxResourceGroups); ctx->groups[funcctx->max_calls].cpuUsage = makeStringInfo(); - ctx->groups[funcctx->max_calls].memUsage = makeStringInfo(); ctx->groups[funcctx->max_calls++].groupId = oid; if (inGroupId != InvalidOid) @@ -317,7 +298,6 @@ pg_resgroup_get_status(PG_FUNCTION_ARGS) } values[6] = CStringGetTextDatum(row->cpuUsage->data); - values[7] = CStringGetTextDatum(row->memUsage->data); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); @@ -462,30 +442,6 @@ dumpResGroupInfo(StringInfo str) } } -Datum -pg_resgroup_check_move_query(PG_FUNCTION_ARGS) -{ - TupleDesc tupdesc; - Datum values[2]; - bool nulls[2]; - HeapTuple htup; - int sessionId = PG_GETARG_INT32(0); - Oid groupId = PG_GETARG_OID(1); - int32 sessionMem = ResGroupGetSessionMemUsage(sessionId); - int32 availMem = ResGroupGetGroupAvailableMem(groupId); - - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) - elog(ERROR, "return type must be a row type"); - tupdesc = BlessTupleDesc(tupdesc); - - MemSet(nulls, 0, sizeof(nulls)); - values[0] = Int32GetDatum(sessionMem); - values[1] = Int32GetDatum(availMem); - htup = heap_form_tuple(tupdesc, values, nulls); - - PG_RETURN_DATUM(HeapTupleGetDatum(htup)); -} - /* * move a query to a resource group */ diff --git a/src/backend/utils/resource_manager/memquota.c b/src/backend/utils/resource_manager/memquota.c index bc0b9c9869f..6856d06423e 100644 --- a/src/backend/utils/resource_manager/memquota.c +++ b/src/backend/utils/resource_manager/memquota.c @@ -53,7 +53,6 @@ typedef struct PolicyAutoContext /** * Forward declarations. */ -static void autoIncOpMemForResGroup(uint64 *opMemKB, int numOps); static bool PolicyAutoPrelimWalker(Node *node, PolicyAutoContext *context); static bool PolicyAutoAssignWalker(Node *node, PolicyAutoContext *context); static bool IsAggMemoryIntensive(Agg *agg); @@ -129,57 +128,6 @@ contain_ordered_aggs_walker(Node *node, void *context) return expression_tree_walker(node, contain_ordered_aggs_walker, context); } -/* - * Automatically increase operator memory buffer in resource group mode. - * - * In resource group if the operator memory buffer is too small for the - * operators we still allow the query to execute by temporarily increasing the - * buffer size, each operator will be assigned 100KB memory no matter it is - * memory intensive or not. The query can execute as long as there is enough - * resource group shared memory, the performance might not be best as 100KB is - * rather small for memory intensive operators. If there is no enought shared - * memory it will run into OOM error on operators. - * - * @param opMemKB the original operator memory buffer size, will be in-place - * updated if not large enough - * @param numOps the number of operators, both memory intensive and - * non-intensive - */ -static void -autoIncOpMemForResGroup(uint64 *opMemKB, int numOps) -{ - uint64 perOpMemKB; /* per-operator buffer size */ - uint64 minOpMemKB; /* minimal buffer size for all the operators */ - - /* Only adjust operator memory buffer for resource group */ - if (!IsResGroupEnabled()) - return; - - /* - * The buffer reserved for a memory intensive operator is the same as - * non-intensive ones, by default it is 100KB - */ - perOpMemKB = *gp_resmanager_memory_policy_auto_fixed_mem; - minOpMemKB = perOpMemKB * numOps; - - /* No need to change operator memory buffer if already large enough */ - if (*opMemKB >= minOpMemKB) - return; - - ereport(DEBUG2, - (errcode(ERRCODE_INSUFFICIENT_RESOURCES), - errmsg("No enough operator memory for current query."), - errdetail("Current query contains %d operators, " - "the minimal operator memory requirement is " INT64_FORMAT " KB, " - "however there is only " INT64_FORMAT " KB reserved. " - "Temporarily increased the operator memory to execute the query.", - numOps, minOpMemKB, *opMemKB), - errhint("Consider increase memory_spill_ratio for better performance."))); - - /* Adjust the buffer */ - *opMemKB = minOpMemKB; -} - /** * Is an agg operator memory intensive? The following cases mean it is: * 1. If agg strategy is hashed @@ -426,13 +374,6 @@ void PolicyAutoAssignOperatorMemoryKB(PlannedStmt *stmt, uint64 memAvailableByte Assert(!result); Assert(ctx.numMemIntensiveOperators + ctx.numNonMemIntensiveOperators > 0); - /* - * Make sure there is enough operator memory in resource group mode. - */ - autoIncOpMemForResGroup(&ctx.queryMemKB, - ctx.numNonMemIntensiveOperators + - ctx.numMemIntensiveOperators); - if (ctx.queryMemKB <= ctx.numNonMemIntensiveOperators * (*gp_resmanager_memory_policy_auto_fixed_mem)) { elog(ERROR, ERRMSG_GP_INSUFFICIENT_STATEMENT_MEMORY); @@ -926,15 +867,6 @@ PolicyEagerFreeAssignOperatorMemoryKB(PlannedStmt *stmt, uint64 memAvailableByte ctx.groupNode = NULL; ctx.nextGroupId = 0; - /* - * Make sure there is enough operator memory in resource group mode. - */ - autoIncOpMemForResGroup(&ctx.groupTree->groupMemKB, - Max(ctx.groupTree->numNonMemIntenseOps, - ctx.groupTree->maxNumConcNonMemIntenseOps) + - Max(ctx.groupTree->numMemIntenseOps, - ctx.groupTree->maxNumConcMemIntenseOps)); - /* * Check if memory exceeds the limit in the root group */ @@ -960,7 +892,6 @@ PolicyEagerFreeAssignOperatorMemoryKB(PlannedStmt *stmt, uint64 memAvailableByte int64 ResourceManagerGetQueryMemoryLimit(PlannedStmt* stmt) { - /* Returns QD's query_mem if we are on the QE, for re-calculating QE's query_mem */ if (Gp_role != GP_ROLE_DISPATCH && !IS_SINGLENODE()) return stmt->query_mem; @@ -973,8 +904,7 @@ ResourceManagerGetQueryMemoryLimit(PlannedStmt* stmt) if (IsResQueueEnabled()) return ResourceQueueGetQueryMemoryLimit(stmt, ActivePortal->queueId); - if (IsResGroupActivated()) - return ResourceGroupGetQueryMemoryLimit(); - return 0; + /* RG FIXME: should we return statement_mem every time? */ + return (uint64) statement_mem * 1024L; } diff --git a/src/backend/utils/resource_manager/resource_manager.c b/src/backend/utils/resource_manager/resource_manager.c index 5097375bfb6..1cda79d406f 100644 --- a/src/backend/utils/resource_manager/resource_manager.c +++ b/src/backend/utils/resource_manager/resource_manager.c @@ -48,6 +48,7 @@ ResManagerShmemInit(void) else if (IsResGroupEnabled() && !IsUnderPostmaster) { ResGroupControlInit(); + CGroupOpsAndInfoInit(); } } @@ -75,10 +76,6 @@ InitResManager(void) * checkpointer, ftsprobe and filerep processes. Wal sender acts like a backend, * so we also need to exclude it. */ - gp_resmanager_memory_policy = (ResManagerMemoryPolicy *) &gp_resgroup_memory_policy; - gp_log_resmanager_memory = &gp_log_resgroup_memory; - gp_resmanager_memory_policy_auto_fixed_mem = &gp_resgroup_memory_policy_auto_fixed_mem; - gp_resmanager_print_operator_memory_limits = &gp_resgroup_print_operator_memory_limits; InitResGroups(); diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index a858ca4615f..911374e2796 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -797,74 +797,45 @@ dumpResGroups(PGconn *conn) PGresult *res; int i; int i_groupname, - i_cpu_rate_limit, + i_cpu_hard_quota_limit, i_concurrency, - i_memory_limit, - i_memory_shared_quota, - i_memory_spill_ratio, - i_memory_auditor, + i_cpu_soft_priority, i_cpuset; printfPQExpBuffer(buf, "SELECT g.rsgname AS groupname, " "t1.value AS concurrency, " - "t2.value AS cpu_rate_limit, " - "t3.value AS memory_limit, " - "t4.value AS memory_shared_quota, " - "t5.value AS memory_spill_ratio, " - "t6.value AS memory_auditor, " - "t7.value AS cpuset " + "t2.value AS cpu_hard_quota_limit, " + "t3.value AS cpu_soft_priority, " + "t4.value AS cpuset " "FROM pg_resgroup g " " JOIN pg_resgroupcapability t1 ON g.oid = t1.resgroupid AND t1.reslimittype = 1 " " JOIN pg_resgroupcapability t2 ON g.oid = t2.resgroupid AND t2.reslimittype = 2 " " JOIN pg_resgroupcapability t3 ON g.oid = t3.resgroupid AND t3.reslimittype = 3 " - " JOIN pg_resgroupcapability t4 ON g.oid = t4.resgroupid AND t4.reslimittype = 4 " - " JOIN pg_resgroupcapability t5 ON g.oid = t5.resgroupid AND t5.reslimittype = 5 " - "LEFT JOIN pg_resgroupcapability t6 ON g.oid = t6.resgroupid AND t6.reslimittype = 6 " - "LEFT JOIN pg_resgroupcapability t7 ON g.oid = t7.resgroupid AND t7.reslimittype = 7;"); + "LEFT JOIN pg_resgroupcapability t4 ON g.oid = t4.resgroupid AND t4.reslimittype = 4;"); res = executeQuery(conn, buf->data); i_groupname = PQfnumber(res, "groupname"); - i_cpu_rate_limit = PQfnumber(res, "cpu_rate_limit"); i_concurrency = PQfnumber(res, "concurrency"); - i_memory_limit = PQfnumber(res, "memory_limit"); - i_memory_shared_quota = PQfnumber(res, "memory_shared_quota"); - i_memory_spill_ratio = PQfnumber(res, "memory_spill_ratio"); - i_memory_auditor = PQfnumber(res, "memory_auditor"); + i_cpu_hard_quota_limit = PQfnumber(res, "cpu_hard_quota_limit"); + i_cpu_soft_priority = PQfnumber(res, "cpu_soft_priority"); i_cpuset = PQfnumber(res, "cpuset"); if (PQntuples(res) > 0) fprintf(OPF, "--\n-- Resource Group\n--\n\n"); - /* - * total cpu_rate_limit and memory_limit should less than 100, so clean - * them before we seting new memory_limit and cpu_rate_limit. - */ - fprintf(OPF, "ALTER RESOURCE GROUP \"admin_group\" SET cpu_rate_limit 1;\n"); - fprintf(OPF, "ALTER RESOURCE GROUP \"default_group\" SET cpu_rate_limit 1;\n"); - fprintf(OPF, "ALTER RESOURCE GROUP \"system_group\" SET cpu_rate_limit 1;\n"); - fprintf(OPF, "ALTER RESOURCE GROUP \"admin_group\" SET memory_limit 1;\n"); - fprintf(OPF, "ALTER RESOURCE GROUP \"default_group\" SET memory_limit 1;\n"); - fprintf(OPF, "ALTER RESOURCE GROUP \"system_group\" SET memory_limit 1;\n"); - for (i = 0; i < PQntuples(res); i++) { const char *groupname; - const char *cpu_rate_limit; const char *concurrency; - const char *memory_limit; - const char *memory_shared_quota; - const char *memory_spill_ratio; - const char *memory_auditor; + const char *cpu_hard_quota_limit; + const char *cpu_soft_priority; const char *cpuset; groupname = PQgetvalue(res, i, i_groupname); - cpu_rate_limit = PQgetvalue(res, i, i_cpu_rate_limit); concurrency = PQgetvalue(res, i, i_concurrency); - memory_limit = PQgetvalue(res, i, i_memory_limit); - memory_shared_quota = PQgetvalue(res, i, i_memory_shared_quota); - memory_spill_ratio = PQgetvalue(res, i, i_memory_spill_ratio); - memory_auditor = PQgetvalue(res, i, i_memory_auditor); + cpu_hard_quota_limit = PQgetvalue(res, i, i_cpu_hard_quota_limit); + cpu_soft_priority = PQgetvalue(res, i, i_cpu_soft_priority); cpuset = PQgetvalue(res, i, i_cpuset); resetPQExpBuffer(buf); @@ -876,62 +847,37 @@ dumpResGroups(PGconn *conn) * We can't emit CREATE statements for the built-in groups as they * will already exist in the target cluster. So emit ALTER * statements instead. - * - * Default resource groups must have memory_auditor == "vmtracker", - * no need to ALTER it, and we do not support ALTER memory_auditor - * at all. */ appendPQExpBuffer(buf, "ALTER RESOURCE GROUP %s SET concurrency %s;\n", fmtId(groupname), concurrency); - appendPQExpBuffer(buf, "ALTER RESOURCE GROUP %s SET memory_limit %s;\n", - fmtId(groupname), memory_limit); - appendPQExpBuffer(buf, "ALTER RESOURCE GROUP %s SET memory_shared_quota %s;\n", - fmtId(groupname), memory_shared_quota); - appendPQExpBuffer(buf, "ALTER RESOURCE GROUP %s SET memory_spill_ratio %s;\n", - fmtId(groupname), memory_spill_ratio); - if (atoi(cpu_rate_limit) >= 0) - appendPQExpBuffer(buf, "ALTER RESOURCE GROUP %s SET cpu_rate_limit %s;\n", - fmtId(groupname), cpu_rate_limit); + + if (atoi(cpu_hard_quota_limit) > 0) { + appendPQExpBuffer(buf, "ALTER RESOURCE GROUP %s SET cpu_hard_quota_limit %s;\n", + fmtId(groupname), cpu_hard_quota_limit); + appendPQExpBuffer(buf, "ALTER RESOURCE GROUP %s SET cpu_soft_priority %s;\n", + fmtId(groupname), cpu_soft_priority); + } else appendPQExpBuffer(buf, "ALTER RESOURCE GROUP %s SET cpuset '%s';\n", fmtId(groupname), cpuset); } else { - const char *memory_auditor_name; - const char *cpu_prop; - char cpu_setting[1024]; - - /* - * Possible values of memory_auditor: - * - "1": cgroup; - * - "0": vmtracker; - * - "": not set, e.g. created on an older version which does not - * support memory_auditor yet, consider it as vmtracker; - */ - if (strcmp(memory_auditor, "1") == 0) - memory_auditor_name = "cgroup"; - else - memory_auditor_name = "vmtracker"; + /* For other groups, we just create it directly. */ - if (atoi(cpu_rate_limit) >= 0) + if (atoi(cpu_hard_quota_limit) > 0) { - cpu_prop = "cpu_rate_limit"; - snprintf(cpu_setting, sizeof(cpu_setting), "%s", cpu_rate_limit); + printfPQExpBuffer(buf, "CREATE RESOURCE GROUP %s WITH (" + "concurrency=%s, cpu_hard_quota_limit=%s, cpu_soft_priority=%s);\n", + fmtId(groupname), concurrency, cpu_hard_quota_limit, cpu_soft_priority); } else { - cpu_prop = "cpuset"; - snprintf(cpu_setting, sizeof(cpu_setting), "'%s'", cpuset); + printfPQExpBuffer(buf, "CREATE RESOURCE GROUP %s WITH (" + "concurrency=%s, cpu_set=%s);\n", + fmtId(groupname), concurrency, cpuset); } - printfPQExpBuffer(buf, "CREATE RESOURCE GROUP %s WITH (" - "concurrency=%s, %s=%s, " - "memory_limit=%s, memory_shared_quota=%s, " - "memory_spill_ratio=%s, memory_auditor=%s);\n", - fmtId(groupname), concurrency, cpu_prop, cpu_setting, - memory_limit, memory_shared_quota, - memory_spill_ratio, memory_auditor_name); } fprintf(OPF, "%s", buf->data); diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 20a59a6ee6f..9a015481166 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -3096,7 +3096,7 @@ psql_completion(const char *text, int start, int end) else if (TailMatches("RESOURCE", "GROUP", MatchAny, "WITH", "(")) { static const char *const list_CREATERESOURCEGROUP[] = - {"CONCURRENCY", "CPU_RATE_LIMIT", "MEMORY_LIMIT", "MEMORY_REDZONE_LIMIT", NULL}; + {"CONCURRENCY", "cpu_hard_quota_limit", "MEMORY_LIMIT", "MEMORY_REDZONE_LIMIT", NULL}; COMPLETE_WITH_LIST(list_CREATERESOURCEGROUP); } diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index cd4616b8fbb..19110f7f0ad 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11789,16 +11789,6 @@ { oid => 7154, descr => 'terminate a server process', proname => 'pg_terminate_backend', provolatile => 'v', prorettype => 'bool', proargtypes => 'int4 text', prosrc => 'pg_terminate_backend_msg' }, - -{ oid => 6053, descr => 'get the memory used by a session and the available memory of the destination group', - proname => 'pg_resgroup_check_move_query', prorows => '1000', proisstrict => 'f', - proretset => 't', provolatile => 'v', proparallel => 'r', prorettype => 'record', - proargtypes => 'int4 oid', - proallargtypes => 'int4,oid,int4,int4', - proargmodes => '{i,i,o,o}', - proargnames => '{session_id,group_id,session_mem,available_mem}', - prosrc => 'pg_resgroup_check_move_query' }, - { oid => 6064, descr => 'move query to a resource group', proname => 'pg_resgroup_move_query', provolatile => 'v', proparallel => 'r', prorettype => 'bool', @@ -11811,7 +11801,7 @@ { oid => 6066, descr => 'statistics: information about resource groups', proexeclocation => 'c', - proname => 'pg_resgroup_get_status', prorows => '1000', proisstrict => 'f', proretset => 't', provolatile => 'v', proparallel => 'r', prorettype => 'record', proargtypes => 'oid', proallargtypes => 'oid,oid,int4,int4,int8,int8,interval,json,json', proargmodes => '{i,o,o,o,o,o,o,o,o}', proargnames => '{groupid,groupid,num_running,num_queueing,num_queued,num_executed,total_queue_duration,cpu_usage,memory_usage}', prosrc => 'pg_resgroup_get_status' }, + proname => 'pg_resgroup_get_status', prorows => '1000', proisstrict => 'f', proretset => 't', provolatile => 'v', proparallel => 'r', prorettype => 'record', proargtypes => 'oid', proallargtypes => 'oid,oid,int4,int4,int8,int8,interval,json', proargmodes => '{i,o,o,o,o,o,o,o}', proargnames => '{groupid,groupid,num_running,num_queueing,num_queued,num_executed,total_queue_duration,cpu_usage}', prosrc => 'pg_resgroup_get_status' }, { oid => 6036, descr => 'waiting relation information', proname => 'gp_dist_wait_status', prorows => '1000', proisstrict => 'f', diff --git a/src/include/catalog/pg_resgroup.h b/src/include/catalog/pg_resgroup.h index 2a5bba2adf5..3625762112e 100644 --- a/src/include/catalog/pg_resgroup.h +++ b/src/include/catalog/pg_resgroup.h @@ -51,13 +51,10 @@ typedef enum ResGroupLimitType { RESGROUP_LIMIT_TYPE_UNKNOWN = 0, - RESGROUP_LIMIT_TYPE_CONCURRENCY, - RESGROUP_LIMIT_TYPE_CPU, - RESGROUP_LIMIT_TYPE_MEMORY, - RESGROUP_LIMIT_TYPE_MEMORY_SHARED_QUOTA, - RESGROUP_LIMIT_TYPE_MEMORY_SPILL_RATIO, - RESGROUP_LIMIT_TYPE_MEMORY_AUDITOR, - RESGROUP_LIMIT_TYPE_CPUSET, + RESGROUP_LIMIT_TYPE_CONCURRENCY, /* concurrency */ + RESGROUP_LIMIT_TYPE_CPU, /* cpu_hard_quota_limit */ + RESGROUP_LIMIT_TYPE_CPU_SHARES, /* cpu_soft_priority */ + RESGROUP_LIMIT_TYPE_CPUSET, /* cpuset */ RESGROUP_LIMIT_TYPE_COUNT, } ResGroupLimitType; diff --git a/src/include/catalog/pg_resgroupcapability.dat b/src/include/catalog/pg_resgroupcapability.dat index 5f8f554427b..514ad35801c 100644 --- a/src/include/catalog/pg_resgroupcapability.dat +++ b/src/include/catalog/pg_resgroupcapability.dat @@ -14,24 +14,15 @@ { resgroupid => '6437', reslimittype => '1', value => '20' }, { resgroupid => '6437', reslimittype => '2', value => '20' }, -{ resgroupid => '6437', reslimittype => '3', value => '0' }, -{ resgroupid => '6437', reslimittype => '4', value => '80' }, -{ resgroupid => '6437', reslimittype => '5', value => '0' }, -{ resgroupid => '6437', reslimittype => '6', value => '0' }, -{ resgroupid => '6437', reslimittype => '7', value => '-1' }, +{ resgroupid => '6437', reslimittype => '3', value => '100' }, +{ resgroupid => '6437', reslimittype => '4', value => '-1' }, { resgroupid => '6438', reslimittype => '1', value => '10' }, { resgroupid => '6438', reslimittype => '2', value => '10' }, -{ resgroupid => '6438', reslimittype => '3', value => '10' }, -{ resgroupid => '6438', reslimittype => '4', value => '80' }, -{ resgroupid => '6438', reslimittype => '5', value => '0' }, -{ resgroupid => '6438', reslimittype => '6', value => '0' }, -{ resgroupid => '6438', reslimittype => '7', value => '-1' }, +{ resgroupid => '6438', reslimittype => '3', value => '100' }, +{ resgroupid => '6438', reslimittype => '4', value => '-1' }, { resgroupid => '6448', reslimittype => '1', value => '0' }, { resgroupid => '6448', reslimittype => '2', value => '10' }, -{ resgroupid => '6448', reslimittype => '3', value => '0' }, -{ resgroupid => '6448', reslimittype => '4', value => '0' }, -{ resgroupid => '6448', reslimittype => '5', value => '0' }, -{ resgroupid => '6448', reslimittype => '6', value => '0' }, -{ resgroupid => '6448', reslimittype => '7', value => '-1' }, +{ resgroupid => '6448', reslimittype => '3', value => '100' }, +{ resgroupid => '6448', reslimittype => '4', value => '-1' }, ] diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index 88b605fd074..3b493217607 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -709,8 +709,14 @@ extern int gp_workfile_bytes_to_checksum; extern bool coredump_on_memerror; -/* Greenplum resource group query_mem re-calculate on QE */ -extern bool gp_resource_group_enable_recalculate_query_mem; +/* Greenplum linux cgroup version, is enable version 2 */ +extern bool gp_resource_group_enable_cgroup_version_two; + +/* Greenplum linux cgroup version, is enable version 2 */ +extern bool gp_resource_group_enable_cgroup_version_two; + +/* Greenplum linux cgroup version, is enable version 2 */ +extern bool gp_resource_group_enable_cgroup_version_two; /* Greenplum linux cgroup version, is enable version 2 */ extern bool gp_resource_group_enable_cgroup_version_two; diff --git a/src/include/commands/resgroupcmds.h b/src/include/commands/resgroupcmds.h index 0a75fab12d7..674f837c68f 100644 --- a/src/include/commands/resgroupcmds.h +++ b/src/include/commands/resgroupcmds.h @@ -19,14 +19,6 @@ #include "utils/resgroup.h" #include "utils/relcache.h" -typedef enum ResGroupMemAuditorType -{ - RESGROUP_MEMORY_AUDITOR_VMTRACKER = 0, - RESGROUP_MEMORY_AUDITOR_CGROUP, - - RESGROUP_MEMORY_AUDITOR_COUNT, -} ResGroupMemAuditorType; - extern void CreateResourceGroup(CreateResourceGroupStmt *stmt); extern void DropResourceGroup(DropResourceGroupStmt *stmt); extern void AlterResourceGroup(AlterResourceGroupStmt *stmt); diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index b679dbea1e6..fbac2ca02b9 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -141,9 +141,6 @@ typedef struct PlannedStmt /* What is the memory reserved for this query's execution? */ uint64 query_mem; - int32 total_memory_coordinator; /* GPDB: The total usable virtual memory on coordinator node in MB */ - int nsegments_coordinator; /* GPDB: The number of primary segments on coordinator node */ - /* * GPDB: Used to keep target information for CTAS and it is needed * to be dispatched to QEs. diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 60d7e4d7cee..c18685486a4 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -107,7 +107,8 @@ PG_KEYWORD("conversion", CONVERSION_P, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("coordinator", COORDINATOR, UNRESERVED_KEYWORD, BARE_LABEL) /* GPDB */ PG_KEYWORD("copy", COPY, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("cost", COST, UNRESERVED_KEYWORD, BARE_LABEL) -PG_KEYWORD("cpu_rate_limit", CPU_RATE_LIMIT, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("cpu_hard_quota_limit", CPU_HARD_QUOTA_LIMIT, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("cpu_soft_priority", CPU_SOFT_PRIORITY, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("cpuset", CPUSET, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("create", CREATE, RESERVED_KEYWORD, AS_LABEL) PG_KEYWORD("createexttable", CREATEEXTTABLE, UNRESERVED_KEYWORD, BARE_LABEL) @@ -289,8 +290,6 @@ PG_KEYWORD("materialized", MATERIALIZED, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("maxvalue", MAXVALUE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("median", MEDIAN, COL_NAME_KEYWORD, BARE_LABEL) PG_KEYWORD("memory_limit", MEMORY_LIMIT, UNRESERVED_KEYWORD, BARE_LABEL) -PG_KEYWORD("memory_shared_quota", MEMORY_SHARED_QUOTA, UNRESERVED_KEYWORD, BARE_LABEL) -PG_KEYWORD("memory_spill_ratio", MEMORY_SPILL_RATIO, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("method", METHOD, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("minute", MINUTE_P, UNRESERVED_KEYWORD, AS_LABEL) PG_KEYWORD("minvalue", MINVALUE, UNRESERVED_KEYWORD, BARE_LABEL) diff --git a/src/include/utils/cgroup.h b/src/include/utils/cgroup.h index 27e5e9f369f..3702df3dabc 100644 --- a/src/include/utils/cgroup.h +++ b/src/include/utils/cgroup.h @@ -42,10 +42,12 @@ * can't be seen in gpdb. */ #define DEFAULT_CPUSET_GROUP_ID 1 + /* - * If cpu_rate_limit is set to this value, it means this feature is disabled + * If cpu_hard_quota_limit is set to this value, it means this feature is disabled. + * And meanwhile, it also means the process can use CPU resource infinitely. */ -#define CPU_RATE_LIMIT_DISABLED (-1) +#define CPU_HARD_QUOTA_LIMIT_DISABLED (-1) /* This is the default value about Linux Control Group */ #define DEFAULT_CPU_PERIOD_US 100000LL @@ -66,7 +68,6 @@ typedef enum */ CGROUP_COMPONENT_CPU = 0, CGROUP_COMPONENT_CPUACCT, - CGROUP_COMPONENT_MEMORY, CGROUP_COMPONENT_CPUSET, CGROUP_COMPONENT_COUNT, @@ -166,19 +167,13 @@ typedef int (*lockcgroup_function) (Oid group, CGroupComponentType component, bo typedef void (*unlockcgroup_function) (int fd); /* Set the cpu limit. */ -typedef void (*setcpulimit_function) (Oid group, int cpu_rate_limit); +typedef void (*setcpulimit_function) (Oid group, int cpu_hard_quota_limit); /* Set the cpu share. */ -typedef void (*setcpushare_function) (Oid group, int cpu_share); +typedef void (*setcpupriority_function) (Oid group, int cpu_soft_priority); /* Get the cpu usage of the OS group. */ typedef int64 (*getcpuusage_function) (Oid group); -typedef int32 (*gettotalmemory_function) (void); -typedef int32 (*getmemoryusage_function) (Oid group); -typedef int32 (*getmemorylimitchunks_function) (Oid group); -typedef void (*setmemorylimit_function) (Oid group, int memory_limit); -typedef void (*setmemorylimitchunks_function) (Oid group, int32 chunks); - /* Get the cpuset configuration of a cgroup. */ typedef void (*getcpuset_function) (Oid group, char *cpuset, int len); @@ -211,16 +206,10 @@ typedef struct CGroupOpsRoutine setcpulimit_function setcpulimit; - setcpushare_function setcpushare; + setcpupriority_function setcpupriority; getcpuusage_function getcpuusage; - gettotalmemory_function gettotalmemory; - getmemoryusage_function getmemoryusage; - setmemorylimit_function setmemorylimit; - getmemorylimitchunks_function getmemorylimitchunks; - setmemorylimitchunks_function setmemorylimitbychunks; - getcpuset_function getcpuset; setcpuset_function setcpuset; diff --git a/src/include/utils/linux-ops.h b/src/include/utils/linux-ops.h new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/include/utils/resgroup.h b/src/include/utils/resgroup.h index 81a25d1ffa4..216fc29a6a1 100644 --- a/src/include/utils/resgroup.h +++ b/src/include/utils/resgroup.h @@ -37,18 +37,9 @@ #define DefaultCpuset "-1" /* - * When setting memory_limit to 0 the group will has no reserved quota, all the - * memory need to be acquired from the global shared memory. + * Default value of cpu soft priority */ -#define RESGROUP_UNLIMITED_MEMORY_LIMIT (0) - -/* - * When setting memory_spill_ratio to 0 the statement_mem will be used to - * decide the operator memory, this is called the fallback mode, the benefit is - * statement_mem can be set in absolute values such as "128 MB" which is easier - * to understand. - */ -#define RESGROUP_FALLBACK_MEMORY_SPILL_RATIO (0) +#define DefaultCPUSoftPriority 100 /* * Resource group capability. @@ -76,11 +67,8 @@ typedef struct ResGroupCaps { ResGroupCap __unknown; /* placeholder, do not use it */ ResGroupCap concurrency; - ResGroupCap cpuRateLimit; - ResGroupCap memLimit; - ResGroupCap memSharedQuota; - ResGroupCap memSpillRatio; - ResGroupCap memAuditor; + ResGroupCap cpuHardQuotaLimit; + ResGroupCap cpuSoftPriority; char cpuset[MaxCpuSetLength]; } ResGroupCaps; @@ -93,24 +81,16 @@ typedef struct ResGroupCaps /* * GUC variables. */ -extern bool gp_log_resgroup_memory; -extern int gp_resgroup_memory_policy_auto_fixed_mem; -extern bool gp_resgroup_print_operator_memory_limits; extern bool gp_resgroup_debug_wait_queue; -extern int memory_spill_ratio; extern int gp_resource_group_cpu_priority; extern double gp_resource_group_cpu_limit; -extern bool gp_resource_group_cpu_ceiling_enforcement; -extern double gp_resource_group_memory_limit; extern bool gp_resource_group_bypass; extern int gp_resource_group_queuing_timeout; /* * Non-GUC global variables. */ -extern bool gp_resource_group_enable_cgroup_memory; -extern bool gp_resource_group_enable_cgroup_swap; extern bool gp_resource_group_enable_cgroup_cpuset; /* @@ -133,7 +113,6 @@ typedef enum RES_GROUP_STAT_TOTAL_QUEUED, RES_GROUP_STAT_TOTAL_QUEUE_TIME, RES_GROUP_STAT_CPU_USAGE, - RES_GROUP_STAT_MEM_USAGE, } ResGroupStatType; /* @@ -141,17 +120,17 @@ typedef enum */ typedef struct { - Oid groupid; + Oid groupid; ResGroupLimitType limittype; - ResGroupCaps caps; - ResGroupCaps oldCaps; /* last config value, alter operation need to - * check last config for recycling */ - ResGroupCap memLimitGap; + ResGroupCaps caps; + ResGroupCaps oldCaps; /* last config value, alter operation need to + * check last config for recycling */ } ResourceGroupCallbackContext; /* Shared memory and semaphores */ extern Size ResGroupShmemSize(void); extern void ResGroupControlInit(void); +extern void CGroupOpsAndInfoInit(void); extern void initCgroup(void); /* Load resource group information from catalog */ @@ -176,13 +155,6 @@ extern bool ResGroupIsAssigned(void); /* Retrieve statistic information of type from resource group */ extern Datum ResGroupGetStat(Oid groupId, ResGroupStatType type); -extern void ResGroupDumpMemoryInfo(void); - -/* Check the memory limit of resource group */ -extern bool ResGroupReserveMemory(int32 memoryChunks, int32 overuseChunks, bool *waiverUsed); -/* Update the memory usage of resource group */ -extern void ResGroupReleaseMemory(int32 memoryChunks); - extern void ResGroupDropFinish(const ResourceGroupCallbackContext *callbackCtx, bool isCommit); extern void ResGroupCreateOnAbort(const ResourceGroupCallbackContext *callbackCtx); @@ -198,15 +170,6 @@ extern void ResGroupCheckForDrop(Oid groupId, char *name); */ extern Oid GetMyResGroupId(void); -extern int32 ResGroupGetVmemLimitChunks(void); -extern int32 ResGroupGetVmemChunkSizeInBits(void); -extern int32 ResGroupGetMaxChunksPerQuery(void); - -/* test helper function */ -extern void ResGroupGetMemInfo(int *memLimit, int *slotQuota, int *sharedQuota); - -extern int64 ResourceGroupGetQueryMemoryLimit(void); - extern void ResGroupDumpInfo(StringInfo str); extern int ResGroupGetHostPrimaryCount(void); @@ -221,14 +184,9 @@ extern void CpusetDifference(char *cpuset1, const char *cpuset2, int len); extern bool CpusetIsEmpty(const char *cpuset); extern void SetCpusetEmpty(char *cpuset, int cpusetSize); extern bool EnsureCpusetIsAvailable(int elevel); -extern bool IsGroupInRedZone(void); -extern void ResGroupGetMemoryRunawayInfo(StringInfo str); extern Oid SessionGetResGroupId(SessionState *session); -extern int32 SessionGetResGroupGlobalShareMemUsage(SessionState *session); extern void HandleMoveResourceGroup(void); extern void ResGroupMoveQuery(int sessionId, Oid groupId, const char *groupName); -extern int32 ResGroupGetSessionMemUsage(int sessionId); -extern int32 ResGroupGetGroupAvailableMem(Oid groupId); extern Oid ResGroupGetGroupIdBySessionId(int sessionId); extern char *getCpuSetByRole(const char *cpuset); extern void checkCpuSetByRole(const char *cpuset); diff --git a/src/include/utils/resscheduler.h b/src/include/utils/resscheduler.h index 018250d8372..7f8d050e3ba 100644 --- a/src/include/utils/resscheduler.h +++ b/src/include/utils/resscheduler.h @@ -27,7 +27,6 @@ /* * GUC variables. */ -extern int gp_resgroup_memory_policy; extern int gp_resqueue_memory_policy; extern bool gp_log_resqueue_memory; extern int gp_resqueue_memory_policy_auto_fixed_mem; diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 651fc567562..aded0b2a2e9 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -70,8 +70,6 @@ "gp_max_slices", "gp_motion_slice_noop", "gp_resgroup_debug_wait_queue", - "gp_resgroup_memory_policy_auto_fixed_mem", - "gp_resgroup_print_operator_memory_limits", "gp_resqueue_memory_policy_auto_fixed_mem", "gp_resqueue_print_operator_memory_limits", "gp_select_invisible", diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index a751b6a4ca9..0a3dce89896 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -267,14 +267,10 @@ "gp_reject_internal_tcp_connection", "gp_reject_percent_threshold", "gp_reraise_signal", - "gp_resgroup_memory_policy", "gp_resource_group_bypass", - "gp_resource_group_cpu_ceiling_enforcement", "gp_resource_group_cpu_limit", "gp_resource_group_cpu_priority", "gp_resource_group_enable_cgroup_version_two", - "gp_resource_group_enable_recalculate_query_mem", - "gp_resource_group_memory_limit", "gp_resource_group_queuing_timeout", "gp_resource_manager", "gp_resqueue_memory_policy", diff --git a/src/test/binary_swap/expected/resgroup_current_1_group.out b/src/test/binary_swap/expected/resgroup_current_1_group.out index c24b172a8e3..9168fd0c013 100644 --- a/src/test/binary_swap/expected/resgroup_current_1_group.out +++ b/src/test/binary_swap/expected/resgroup_current_1_group.out @@ -45,10 +45,10 @@ select rsgname from gp_toolkit.gp_resgroup_status rg2 (4 rows) -alter resource group rg1 set cpu_rate_limit 20; -alter resource group rg1 set cpu_rate_limit 10; +alter resource group rg1 set cpu_hard_quota_limit 20; +alter resource group rg1 set cpu_hard_quota_limit 10; drop resource group rg1; -create resource group rg1 with (cpu_rate_limit=10, memory_limit=10); -alter resource group rg2 set cpu_rate_limit 20; -alter resource group rg2 set cpu_rate_limit 10; +create resource group rg1 with (cpu_hard_quota_limit=10); +alter resource group rg2 set cpu_hard_quota_limit 20; +alter resource group rg2 set cpu_hard_quota_limit 10; -- TODO: change to the resgroup diff --git a/src/test/binary_swap/expected/resgroup_current_1_queue.out b/src/test/binary_swap/expected/resgroup_current_1_queue.out index fcba841fe1c..07e7a152c1d 100644 --- a/src/test/binary_swap/expected/resgroup_current_1_queue.out +++ b/src/test/binary_swap/expected/resgroup_current_1_queue.out @@ -37,15 +37,15 @@ select rsgname from gp_toolkit.gp_resgroup_status --------- (0 rows) -create resource group rg1 with (cpu_rate_limit=10, memory_limit=10); +create resource group rg1 with (cpu_hard_quota_limit=10, memory_limit=10); WARNING: resource group is disabled HINT: To enable set gp_resource_manager=group -alter resource group rg1 set cpu_rate_limit 20; -alter resource group rg1 set cpu_rate_limit 10; +alter resource group rg1 set cpu_hard_quota_limit 20; +alter resource group rg1 set cpu_hard_quota_limit 10; drop resource group rg1; -create resource group rg1 with (cpu_rate_limit=10, memory_limit=10); +create resource group rg1 with (cpu_hard_quota_limit=10, memory_limit=10); WARNING: resource group is disabled HINT: To enable set gp_resource_manager=group -create resource group rg2 with (cpu_rate_limit=10, memory_limit=10); +create resource group rg2 with (cpu_hard_quota_limit=10, memory_limit=10); WARNING: resource group is disabled HINT: To enable set gp_resource_manager=group diff --git a/src/test/binary_swap/expected/resgroup_current_3_group.out b/src/test/binary_swap/expected/resgroup_current_3_group.out index c24b172a8e3..a76f1b061d7 100644 --- a/src/test/binary_swap/expected/resgroup_current_3_group.out +++ b/src/test/binary_swap/expected/resgroup_current_3_group.out @@ -45,10 +45,10 @@ select rsgname from gp_toolkit.gp_resgroup_status rg2 (4 rows) -alter resource group rg1 set cpu_rate_limit 20; -alter resource group rg1 set cpu_rate_limit 10; +alter resource group rg1 set cpu_hard_quota_limit 20; +alter resource group rg1 set cpu_hard_quota_limit 10; drop resource group rg1; -create resource group rg1 with (cpu_rate_limit=10, memory_limit=10); -alter resource group rg2 set cpu_rate_limit 20; -alter resource group rg2 set cpu_rate_limit 10; +create resource group rg1 with (cpu_hard_quota_limit=10, memory_limit=10); +alter resource group rg2 set cpu_hard_quota_limit 20; +alter resource group rg2 set cpu_hard_quota_limit 10; -- TODO: change to the resgroup diff --git a/src/test/binary_swap/expected/resgroup_current_3_queue.out b/src/test/binary_swap/expected/resgroup_current_3_queue.out index a0de421699f..6d2c3cf41d7 100644 --- a/src/test/binary_swap/expected/resgroup_current_3_queue.out +++ b/src/test/binary_swap/expected/resgroup_current_3_queue.out @@ -41,11 +41,11 @@ select rsgname from gp_toolkit.gp_resgroup_status --------- (0 rows) -alter resource group rg1 set cpu_rate_limit 20; -alter resource group rg1 set cpu_rate_limit 10; +alter resource group rg1 set cpu_hard_quota_limit 20; +alter resource group rg1 set cpu_hard_quota_limit 10; drop resource group rg1; -create resource group rg1 with (cpu_rate_limit=10, memory_limit=10); +create resource group rg1 with (cpu_hard_quota_limit=10, memory_limit=10); WARNING: resource group is disabled HINT: To enable set gp_resource_manager=group -alter resource group rg2 set cpu_rate_limit 20; -alter resource group rg2 set cpu_rate_limit 10; +alter resource group rg2 set cpu_hard_quota_limit 20; +alter resource group rg2 set cpu_hard_quota_limit 10; diff --git a/src/test/binary_swap/expected/resgroup_other_2_group.out b/src/test/binary_swap/expected/resgroup_other_2_group.out index 4aca58db542..2e8f0ac8a00 100644 --- a/src/test/binary_swap/expected/resgroup_other_2_group.out +++ b/src/test/binary_swap/expected/resgroup_other_2_group.out @@ -45,10 +45,10 @@ select rsgname from gp_toolkit.gp_resgroup_status rg2 (4 rows) -alter resource group rg1 set cpu_rate_limit 20; -alter resource group rg1 set cpu_rate_limit 10; +alter resource group rg1 set cpu_hard_quota_limit 20; +alter resource group rg1 set cpu_hard_quota_limit 10; drop resource group rg1; -create resource group rg1 with (cpu_rate_limit=10, memory_limit=10); -alter resource group rg2 set cpu_rate_limit 20; -alter resource group rg2 set cpu_rate_limit 10; +create resource group rg1 with (cpu_hard_quota_limit=10, memory_limit=10); +alter resource group rg2 set cpu_hard_quota_limit 20; +alter resource group rg2 set cpu_hard_quota_limit 10; -- TODO: change to the resgroup diff --git a/src/test/binary_swap/expected/resgroup_other_2_queue.out b/src/test/binary_swap/expected/resgroup_other_2_queue.out index a0de421699f..6d2c3cf41d7 100644 --- a/src/test/binary_swap/expected/resgroup_other_2_queue.out +++ b/src/test/binary_swap/expected/resgroup_other_2_queue.out @@ -41,11 +41,11 @@ select rsgname from gp_toolkit.gp_resgroup_status --------- (0 rows) -alter resource group rg1 set cpu_rate_limit 20; -alter resource group rg1 set cpu_rate_limit 10; +alter resource group rg1 set cpu_hard_quota_limit 20; +alter resource group rg1 set cpu_hard_quota_limit 10; drop resource group rg1; -create resource group rg1 with (cpu_rate_limit=10, memory_limit=10); +create resource group rg1 with (cpu_hard_quota_limit=10, memory_limit=10); WARNING: resource group is disabled HINT: To enable set gp_resource_manager=group -alter resource group rg2 set cpu_rate_limit 20; -alter resource group rg2 set cpu_rate_limit 10; +alter resource group rg2 set cpu_hard_quota_limit 20; +alter resource group rg2 set cpu_hard_quota_limit 10; diff --git a/src/test/binary_swap/sql/resgroup_current_1_group.sql b/src/test/binary_swap/sql/resgroup_current_1_group.sql index cf306d05038..d8fc6b1bbcf 100644 --- a/src/test/binary_swap/sql/resgroup_current_1_group.sql +++ b/src/test/binary_swap/sql/resgroup_current_1_group.sql @@ -13,12 +13,12 @@ select rsgname from gp_toolkit.gp_resgroup_status where rsgname not like 'rg_dump_test%' order by groupid; -alter resource group rg1 set cpu_rate_limit 20; -alter resource group rg1 set cpu_rate_limit 10; +alter resource group rg1 set cpu_hard_quota_limit 20; +alter resource group rg1 set cpu_hard_quota_limit 10; drop resource group rg1; -create resource group rg1 with (cpu_rate_limit=10, memory_limit=10); +create resource group rg1 with (cpu_hard_quota_limit=10, memory_limit=10); -alter resource group rg2 set cpu_rate_limit 20; -alter resource group rg2 set cpu_rate_limit 10; +alter resource group rg2 set cpu_hard_quota_limit 20; +alter resource group rg2 set cpu_hard_quota_limit 10; -- TODO: change to the resgroup diff --git a/src/test/binary_swap/sql/resgroup_current_1_queue.sql b/src/test/binary_swap/sql/resgroup_current_1_queue.sql index 575ac0c6221..685c20cd588 100644 --- a/src/test/binary_swap/sql/resgroup_current_1_queue.sql +++ b/src/test/binary_swap/sql/resgroup_current_1_queue.sql @@ -13,10 +13,10 @@ select rsgname from gp_toolkit.gp_resgroup_status where rsgname not like 'rg_dump_test%' order by groupid; -create resource group rg1 with (cpu_rate_limit=10, memory_limit=10); -alter resource group rg1 set cpu_rate_limit 20; -alter resource group rg1 set cpu_rate_limit 10; +create resource group rg1 with (cpu_hard_quota_limit=10, memory_limit=10); +alter resource group rg1 set cpu_hard_quota_limit 20; +alter resource group rg1 set cpu_hard_quota_limit 10; drop resource group rg1; -create resource group rg1 with (cpu_rate_limit=10, memory_limit=10); +create resource group rg1 with (cpu_hard_quota_limit=10, memory_limit=10); -create resource group rg2 with (cpu_rate_limit=10, memory_limit=10); +create resource group rg2 with (cpu_hard_quota_limit=10, memory_limit=10); diff --git a/src/test/binary_swap/sql/resgroup_current_3_group.sql b/src/test/binary_swap/sql/resgroup_current_3_group.sql index cf306d05038..d8fc6b1bbcf 100644 --- a/src/test/binary_swap/sql/resgroup_current_3_group.sql +++ b/src/test/binary_swap/sql/resgroup_current_3_group.sql @@ -13,12 +13,12 @@ select rsgname from gp_toolkit.gp_resgroup_status where rsgname not like 'rg_dump_test%' order by groupid; -alter resource group rg1 set cpu_rate_limit 20; -alter resource group rg1 set cpu_rate_limit 10; +alter resource group rg1 set cpu_hard_quota_limit 20; +alter resource group rg1 set cpu_hard_quota_limit 10; drop resource group rg1; -create resource group rg1 with (cpu_rate_limit=10, memory_limit=10); +create resource group rg1 with (cpu_hard_quota_limit=10, memory_limit=10); -alter resource group rg2 set cpu_rate_limit 20; -alter resource group rg2 set cpu_rate_limit 10; +alter resource group rg2 set cpu_hard_quota_limit 20; +alter resource group rg2 set cpu_hard_quota_limit 10; -- TODO: change to the resgroup diff --git a/src/test/binary_swap/sql/resgroup_current_3_queue.sql b/src/test/binary_swap/sql/resgroup_current_3_queue.sql index 32968aba751..ff82b37b112 100644 --- a/src/test/binary_swap/sql/resgroup_current_3_queue.sql +++ b/src/test/binary_swap/sql/resgroup_current_3_queue.sql @@ -13,10 +13,10 @@ select rsgname from gp_toolkit.gp_resgroup_status where rsgname not like 'rg_dump_test%' order by groupid; -alter resource group rg1 set cpu_rate_limit 20; -alter resource group rg1 set cpu_rate_limit 10; +alter resource group rg1 set cpu_hard_quota_limit 20; +alter resource group rg1 set cpu_hard_quota_limit 10; drop resource group rg1; -create resource group rg1 with (cpu_rate_limit=10, memory_limit=10); +create resource group rg1 with (cpu_hard_quota_limit=10, memory_limit=10); -alter resource group rg2 set cpu_rate_limit 20; -alter resource group rg2 set cpu_rate_limit 10; +alter resource group rg2 set cpu_hard_quota_limit 20; +alter resource group rg2 set cpu_hard_quota_limit 10; diff --git a/src/test/binary_swap/sql/resgroup_other_2_group.sql b/src/test/binary_swap/sql/resgroup_other_2_group.sql index cf306d05038..d8fc6b1bbcf 100644 --- a/src/test/binary_swap/sql/resgroup_other_2_group.sql +++ b/src/test/binary_swap/sql/resgroup_other_2_group.sql @@ -13,12 +13,12 @@ select rsgname from gp_toolkit.gp_resgroup_status where rsgname not like 'rg_dump_test%' order by groupid; -alter resource group rg1 set cpu_rate_limit 20; -alter resource group rg1 set cpu_rate_limit 10; +alter resource group rg1 set cpu_hard_quota_limit 20; +alter resource group rg1 set cpu_hard_quota_limit 10; drop resource group rg1; -create resource group rg1 with (cpu_rate_limit=10, memory_limit=10); +create resource group rg1 with (cpu_hard_quota_limit=10, memory_limit=10); -alter resource group rg2 set cpu_rate_limit 20; -alter resource group rg2 set cpu_rate_limit 10; +alter resource group rg2 set cpu_hard_quota_limit 20; +alter resource group rg2 set cpu_hard_quota_limit 10; -- TODO: change to the resgroup diff --git a/src/test/binary_swap/sql/resgroup_other_2_queue.sql b/src/test/binary_swap/sql/resgroup_other_2_queue.sql index 32968aba751..ff82b37b112 100644 --- a/src/test/binary_swap/sql/resgroup_other_2_queue.sql +++ b/src/test/binary_swap/sql/resgroup_other_2_queue.sql @@ -13,10 +13,10 @@ select rsgname from gp_toolkit.gp_resgroup_status where rsgname not like 'rg_dump_test%' order by groupid; -alter resource group rg1 set cpu_rate_limit 20; -alter resource group rg1 set cpu_rate_limit 10; +alter resource group rg1 set cpu_hard_quota_limit 20; +alter resource group rg1 set cpu_hard_quota_limit 10; drop resource group rg1; -create resource group rg1 with (cpu_rate_limit=10, memory_limit=10); +create resource group rg1 with (cpu_hard_quota_limit=10, memory_limit=10); -alter resource group rg2 set cpu_rate_limit 20; -alter resource group rg2 set cpu_rate_limit 10; +alter resource group rg2 set cpu_hard_quota_limit 20; +alter resource group rg2 set cpu_hard_quota_limit 10; diff --git a/src/test/isolation2/expected/resgroup/resgroup_alter_concurrency.out b/src/test/isolation2/expected/resgroup/resgroup_alter_concurrency.out index 4775a18158c..78456a883a7 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_alter_concurrency.out +++ b/src/test/isolation2/expected/resgroup/resgroup_alter_concurrency.out @@ -5,7 +5,7 @@ DROP DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=10); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=10); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -490,7 +490,7 @@ DROP ROLE role_concurrency_test; DROP DROP RESOURCE GROUP rg_concurrency_test; DROP -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_rate_limit=20, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=10); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_hard_quota_limit=20, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=10); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -640,7 +640,7 @@ DROP RESOURCE GROUP rg_concurrency_test; DROP -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_hard_quota_limit=20, memory_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -667,7 +667,7 @@ DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_hard_quota_limit=20, memory_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE diff --git a/src/test/isolation2/expected/resgroup/resgroup_alter_memory_spill_ratio.out b/src/test/isolation2/expected/resgroup/resgroup_alter_memory_spill_ratio.out deleted file mode 100644 index 34fb75542c8..00000000000 --- a/src/test/isolation2/expected/resgroup/resgroup_alter_memory_spill_ratio.out +++ /dev/null @@ -1,74 +0,0 @@ --- start_ignore -DROP RESOURCE GROUP rg_spill_test; -ERROR: resource group "rg_spill_test" does not exist --- end_ignore -CREATE RESOURCE GROUP rg_spill_test WITH (concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=20, memory_spill_ratio=10); -CREATE - -CREATE OR REPLACE VIEW rg_spill_status AS SELECT groupname, memory_shared_quota, memory_spill_ratio FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_spill_test'; -CREATE - --- ALTER MEMORY_SPILL_RATIO - -SELECT * FROM rg_spill_status; - groupname | memory_shared_quota | memory_spill_ratio ----------------+---------------------+-------------------- - rg_spill_test | 20 | 10 -(1 row) - --- positive -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 20; -ALTER -SELECT * FROM rg_spill_status; - groupname | memory_shared_quota | memory_spill_ratio ----------------+---------------------+-------------------- - rg_spill_test | 20 | 20 -(1 row) - --- positive, memory_spill_ratio range is [0, 100] -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 0; -ALTER -SELECT * FROM rg_spill_status; - groupname | memory_shared_quota | memory_spill_ratio ----------------+---------------------+-------------------- - rg_spill_test | 20 | 0 -(1 row) - --- positive: no limit on the sum of shared and spill -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 81; -ALTER -SELECT * FROM rg_spill_status; - groupname | memory_shared_quota | memory_spill_ratio ----------------+---------------------+-------------------- - rg_spill_test | 20 | 81 -(1 row) - --- negative: memory_spill_ratio is invalid -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 20.0; -ERROR: syntax error at or near "20.0" -LINE 1: ...ER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 20.0; - ^ -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO a; -ERROR: syntax error at or near "a" -LINE 1: ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO a; - ^ -SELECT * FROM rg_spill_status; - groupname | memory_shared_quota | memory_spill_ratio ----------------+---------------------+-------------------- - rg_spill_test | 20 | 81 -(1 row) - --- negative: memory_spill_ratio is larger than RESGROUP_MAX_MEMORY_SPILL_RATIO -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 101; -ERROR: memory_spill_ratio range is [0, 100] -SELECT * FROM rg_spill_status; - groupname | memory_shared_quota | memory_spill_ratio ----------------+---------------------+-------------------- - rg_spill_test | 20 | 81 -(1 row) - --- cleanup -DROP VIEW rg_spill_status; -DROP -DROP RESOURCE GROUP rg_spill_test; -DROP diff --git a/src/test/isolation2/expected/resgroup/resgroup_assign_slot_fail.out b/src/test/isolation2/expected/resgroup/resgroup_assign_slot_fail.out index da279543021..59c1672358b 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_assign_slot_fail.out +++ b/src/test/isolation2/expected/resgroup/resgroup_assign_slot_fail.out @@ -7,7 +7,7 @@ DROP DROP RESOURCE GROUP rg_test; ERROR: resource group "rg_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test WITH (concurrency=2, cpu_hard_quota_limit=10); CREATE CREATE ROLE role_test RESOURCE GROUP rg_test; CREATE diff --git a/src/test/isolation2/expected/resgroup/resgroup_bypass_memory_limit.out b/src/test/isolation2/expected/resgroup/resgroup_bypass_memory_limit.out deleted file mode 100644 index 1f2c8e885c5..00000000000 --- a/src/test/isolation2/expected/resgroup/resgroup_bypass_memory_limit.out +++ /dev/null @@ -1,960 +0,0 @@ --- --- set timezone will accumulate the memory usage in session. --- here is used to test bypass memory limit is query level instead of session level. --- -SET TIMEZONE TO 'Japan'; -SET -SET TIMEZONE TO 'Portugal'; -SET -SET TIMEZONE TO 'Canada/Eastern'; -SET -SET TIMEZONE TO 'Canada/Central'; -SET -SET TIMEZONE TO 'Canada/Newfoundland'; -SET -SET TIMEZONE TO 'Canada/Mountain'; -SET -SET TIMEZONE TO 'Canada/Saskatchewan'; -SET -SET TIMEZONE TO 'Canada/Yukon'; -SET -SET TIMEZONE TO 'Canada/Pacific'; -SET -SET TIMEZONE TO 'Canada/Atlantic'; -SET -SET TIMEZONE TO 'CET'; -SET -SET TIMEZONE TO 'ROK'; -SET -SET TIMEZONE TO 'Arctic/Longyearbyen'; -SET -SET TIMEZONE TO 'PRC'; -SET -SET TIMEZONE TO 'GMT-0'; -SET -SET TIMEZONE TO 'Iran'; -SET -SET TIMEZONE TO 'GB-Eire'; -SET -SET TIMEZONE TO 'Jamaica'; -SET -SET TIMEZONE TO 'Europe/Tiraspol'; -SET -SET TIMEZONE TO 'Europe/Vaduz'; -SET -SET TIMEZONE TO 'Europe/Berlin'; -SET -SET TIMEZONE TO 'Europe/San_Marino'; -SET -SET TIMEZONE TO 'Europe/Guernsey'; -SET -SET TIMEZONE TO 'Europe/Ljubljana'; -SET -SET TIMEZONE TO 'Europe/Simferopol'; -SET -SET TIMEZONE TO 'Europe/Belgrade'; -SET -SET TIMEZONE TO 'Europe/Chisinau'; -SET -SET TIMEZONE TO 'Europe/London'; -SET -SET TIMEZONE TO 'Europe/Vatican'; -SET -SET TIMEZONE TO 'Europe/Skopje'; -SET -SET TIMEZONE TO 'Europe/Saratov'; -SET -SET TIMEZONE TO 'Europe/Jersey'; -SET -SET TIMEZONE TO 'Europe/Samara'; -SET -SET TIMEZONE TO 'Europe/Oslo'; -SET -SET TIMEZONE TO 'Europe/Helsinki'; -SET -SET TIMEZONE TO 'Europe/Luxembourg'; -SET -SET TIMEZONE TO 'Europe/Bucharest'; -SET -SET TIMEZONE TO 'Europe/Podgorica'; -SET -SET TIMEZONE TO 'Europe/Madrid'; -SET -SET TIMEZONE TO 'Europe/Sarajevo'; -SET -SET TIMEZONE TO 'Europe/Busingen'; -SET -SET TIMEZONE TO 'Europe/Monaco'; -SET -SET TIMEZONE TO 'Europe/Belfast'; -SET -SET TIMEZONE TO 'Europe/Zagreb'; -SET -SET TIMEZONE TO 'Europe/Warsaw'; -SET -SET TIMEZONE TO 'Europe/Sofia'; -SET -SET TIMEZONE TO 'Europe/Tallinn'; -SET -SET TIMEZONE TO 'Europe/Brussels'; -SET -SET TIMEZONE TO 'Europe/Isle_of_Man'; -SET -SET TIMEZONE TO 'Europe/Stockholm'; -SET -SET TIMEZONE TO 'Europe/Lisbon'; -SET -SET TIMEZONE TO 'Europe/Istanbul'; -SET -SET TIMEZONE TO 'Europe/Copenhagen'; -SET -SET TIMEZONE TO 'Europe/Tirane'; -SET -SET TIMEZONE TO 'Europe/Dublin'; -SET -SET TIMEZONE TO 'Europe/Gibraltar'; -SET -SET TIMEZONE TO 'Europe/Athens'; -SET -SET TIMEZONE TO 'Europe/Zurich'; -SET -SET TIMEZONE TO 'Europe/Vilnius'; -SET -SET TIMEZONE TO 'Europe/Malta'; -SET -SET TIMEZONE TO 'Europe/Riga'; -SET -SET TIMEZONE TO 'Europe/Vienna'; -SET -SET TIMEZONE TO 'Europe/Moscow'; -SET -SET TIMEZONE TO 'Europe/Ulyanovsk'; -SET -SET TIMEZONE TO 'Europe/Rome'; -SET -SET TIMEZONE TO 'Europe/Kirov'; -SET -SET TIMEZONE TO 'Europe/Uzhgorod'; -SET -SET TIMEZONE TO 'Europe/Kaliningrad'; -SET -SET TIMEZONE TO 'Europe/Mariehamn'; -SET -SET TIMEZONE TO 'Europe/Budapest'; -SET -SET TIMEZONE TO 'Europe/Astrakhan'; -SET -SET TIMEZONE TO 'Europe/Volgograd'; -SET -SET TIMEZONE TO 'Europe/Kiev'; -SET -SET TIMEZONE TO 'Europe/Paris'; -SET -SET TIMEZONE TO 'Europe/Andorra'; -SET -SET TIMEZONE TO 'Europe/Amsterdam'; -SET -SET TIMEZONE TO 'Europe/Nicosia'; -SET -SET TIMEZONE TO 'Europe/Bratislava'; -SET -SET TIMEZONE TO 'Europe/Minsk'; -SET -SET TIMEZONE TO 'Europe/Prague'; -SET -SET TIMEZONE TO 'Europe/Zaporozhye'; -SET -SET TIMEZONE TO 'Egypt'; -SET -SET TIMEZONE TO 'Zulu'; -SET -SET TIMEZONE TO 'Etc/GMT+6'; -SET -SET TIMEZONE TO 'Etc/GMT-8'; -SET -SET TIMEZONE TO 'Etc/GMT-0'; -SET -SET TIMEZONE TO 'Etc/GMT+3'; -SET -SET TIMEZONE TO 'Etc/GMT-12'; -SET -SET TIMEZONE TO 'Etc/GMT+7'; -SET -SET TIMEZONE TO 'Etc/GMT-14'; -SET -SET TIMEZONE TO 'Etc/GMT-10'; -SET -SET TIMEZONE TO 'Etc/Zulu'; -SET -SET TIMEZONE TO 'Etc/GMT-4'; -SET -SET TIMEZONE TO 'Etc/GMT+5'; -SET -SET TIMEZONE TO 'Etc/GMT'; -SET -SET TIMEZONE TO 'Etc/GMT-11'; -SET -SET TIMEZONE TO 'Etc/GMT-5'; -SET -SET TIMEZONE TO 'Etc/GMT+1'; -SET -SET TIMEZONE TO 'Etc/GMT+10'; -SET -SET TIMEZONE TO 'Etc/UCT'; -SET -SET TIMEZONE TO 'Etc/GMT+11'; -SET -SET TIMEZONE TO 'Etc/GMT-3'; -SET -SET TIMEZONE TO 'Etc/Greenwich'; -SET -SET TIMEZONE TO 'Etc/GMT-13'; -SET -SET TIMEZONE TO 'Etc/GMT-9'; -SET -SET TIMEZONE TO 'Etc/GMT-7'; -SET -SET TIMEZONE TO 'Etc/GMT-6'; -SET -SET TIMEZONE TO 'Etc/GMT+4'; -SET -SET TIMEZONE TO 'Etc/GMT+2'; -SET -SET TIMEZONE TO 'Etc/Universal'; -SET -SET TIMEZONE TO 'Etc/GMT+9'; -SET -SET TIMEZONE TO 'Etc/GMT+0'; -SET -SET TIMEZONE TO 'Etc/GMT-2'; -SET -SET TIMEZONE TO 'Etc/GMT+12'; -SET -SET TIMEZONE TO 'Etc/GMT-1'; -SET -SET TIMEZONE TO 'Etc/UTC'; -SET -SET TIMEZONE TO 'Etc/GMT+8'; -SET -SET TIMEZONE TO 'Etc/GMT0'; -SET -SET TIMEZONE TO 'Australia/Currie'; -SET -SET TIMEZONE TO 'Australia/Lindeman'; -SET -SET TIMEZONE TO 'Australia/Melbourne'; -SET -SET TIMEZONE TO 'Australia/Yancowinna'; -SET -SET TIMEZONE TO 'Australia/Eucla'; -SET -SET TIMEZONE TO 'Australia/South'; -SET -SET TIMEZONE TO 'Australia/Lord_Howe'; -SET -SET TIMEZONE TO 'Australia/Perth'; -SET -SET TIMEZONE TO 'Australia/Canberra'; -SET -SET TIMEZONE TO 'Australia/Hobart'; -SET -SET TIMEZONE TO 'Australia/North'; -SET -SET TIMEZONE TO 'Australia/Broken_Hill'; -SET -SET TIMEZONE TO 'Australia/NSW'; -SET -SET TIMEZONE TO 'Australia/Victoria'; -SET -SET TIMEZONE TO 'Australia/Adelaide'; -SET -SET TIMEZONE TO 'Australia/Queensland'; -SET -SET TIMEZONE TO 'Australia/Darwin'; -SET -SET TIMEZONE TO 'Australia/West'; -SET -SET TIMEZONE TO 'Australia/LHI'; -SET -SET TIMEZONE TO 'Australia/ACT'; -SET -SET TIMEZONE TO 'Australia/Sydney'; -SET -SET TIMEZONE TO 'Australia/Brisbane'; -SET -SET TIMEZONE TO 'Australia/Tasmania'; -SET -SET TIMEZONE TO 'W-SU'; -SET -SET TIMEZONE TO 'Africa/Kigali'; -SET -SET TIMEZONE TO 'Africa/Dar_es_Salaam'; -SET -SET TIMEZONE TO 'Africa/Niamey'; -SET -SET TIMEZONE TO 'Africa/Brazzaville'; -SET -SET TIMEZONE TO 'Africa/Addis_Ababa'; -SET -SET TIMEZONE TO 'Africa/Kampala'; -SET -SET TIMEZONE TO 'Africa/Kinshasa'; -SET -SET TIMEZONE TO 'Africa/Lagos'; -SET -SET TIMEZONE TO 'Africa/Accra'; -SET -SET TIMEZONE TO 'Africa/Bangui'; -SET -SET TIMEZONE TO 'Africa/Maputo'; -SET -SET TIMEZONE TO 'Africa/Asmara'; -SET -SET TIMEZONE TO 'Africa/Juba'; -SET -SET TIMEZONE TO 'Africa/Tunis'; -SET -SET TIMEZONE TO 'Africa/Bissau'; -SET -SET TIMEZONE TO 'Africa/Freetown'; -SET -SET TIMEZONE TO 'Africa/Tripoli'; -SET -SET TIMEZONE TO 'Africa/Windhoek'; -SET -SET TIMEZONE TO 'Africa/Casablanca'; -SET -SET TIMEZONE TO 'Africa/Mbabane'; -SET -SET TIMEZONE TO 'Africa/Harare'; -SET -SET TIMEZONE TO 'Africa/Mogadishu'; -SET -SET TIMEZONE TO 'Africa/Banjul'; -SET -SET TIMEZONE TO 'Africa/Djibouti'; -SET -SET TIMEZONE TO 'Africa/Malabo'; -SET -SET TIMEZONE TO 'Africa/Nouakchott'; -SET -SET TIMEZONE TO 'Africa/Lubumbashi'; -SET -SET TIMEZONE TO 'Africa/Luanda'; -SET -SET TIMEZONE TO 'Africa/Bamako'; -SET -SET TIMEZONE TO 'Africa/Nairobi'; -SET -SET TIMEZONE TO 'Africa/Lusaka'; -SET -SET TIMEZONE TO 'Africa/Ouagadougou'; -SET -SET TIMEZONE TO 'Africa/Asmera'; -SET -SET TIMEZONE TO 'Africa/Douala'; -SET -SET TIMEZONE TO 'Africa/Dakar'; -SET -SET TIMEZONE TO 'Africa/Khartoum'; -SET -SET TIMEZONE TO 'Africa/Libreville'; -SET -SET TIMEZONE TO 'Africa/Maseru'; -SET -SET TIMEZONE TO 'Africa/Lome'; -SET -SET TIMEZONE TO 'Africa/Abidjan'; -SET -SET TIMEZONE TO 'Africa/Ceuta'; -SET -SET TIMEZONE TO 'Africa/El_Aaiun'; -SET -SET TIMEZONE TO 'Africa/Algiers'; -SET -SET TIMEZONE TO 'Africa/Ndjamena'; -SET -SET TIMEZONE TO 'Africa/Gaborone'; -SET -SET TIMEZONE TO 'Africa/Blantyre'; -SET -SET TIMEZONE TO 'Africa/Sao_Tome'; -SET -SET TIMEZONE TO 'Africa/Monrovia'; -SET -SET TIMEZONE TO 'Africa/Johannesburg'; -SET -SET TIMEZONE TO 'Africa/Timbuktu'; -SET -SET TIMEZONE TO 'Africa/Cairo'; -SET -SET TIMEZONE TO 'Africa/Porto-Novo'; -SET -SET TIMEZONE TO 'Africa/Bujumbura'; -SET -SET TIMEZONE TO 'Africa/Conakry'; -SET -SET TIMEZONE TO 'PST8PDT'; -SET -SET TIMEZONE TO 'Indian/Maldives'; -SET -SET TIMEZONE TO 'Indian/Mahe'; -SET -SET TIMEZONE TO 'Indian/Christmas'; -SET -SET TIMEZONE TO 'Indian/Mauritius'; -SET -SET TIMEZONE TO 'Indian/Chagos'; -SET -SET TIMEZONE TO 'Indian/Mayotte'; -SET -SET TIMEZONE TO 'Indian/Reunion'; -SET -SET TIMEZONE TO 'Indian/Antananarivo'; -SET -SET TIMEZONE TO 'Indian/Kerguelen'; -SET -SET TIMEZONE TO 'Indian/Cocos'; -SET -SET TIMEZONE TO 'Indian/Comoro'; -SET -SET TIMEZONE TO 'MET'; -SET -SET TIMEZONE TO 'ROC'; -SET -SET TIMEZONE TO 'EET'; -SET -SET TIMEZONE TO 'GMT'; -SET -SET TIMEZONE TO 'EST'; -SET -SET TIMEZONE TO 'Hongkong'; -SET -SET TIMEZONE TO 'Turkey'; -SET -SET TIMEZONE TO 'Iceland'; -SET -SET TIMEZONE TO 'Poland'; -SET -SET TIMEZONE TO 'GB'; -SET -SET TIMEZONE TO 'Israel'; -SET -SET TIMEZONE TO 'UCT'; -SET -SET TIMEZONE TO 'Navajo'; -SET -SET TIMEZONE TO 'Greenwich'; -SET -SET TIMEZONE TO 'Antarctica/Davis'; -SET -SET TIMEZONE TO 'Antarctica/Casey'; -SET -SET TIMEZONE TO 'Antarctica/Troll'; -SET -SET TIMEZONE TO 'Antarctica/DumontDUrville'; -SET -SET TIMEZONE TO 'Antarctica/South_Pole'; -SET -SET TIMEZONE TO 'Antarctica/Vostok'; -SET -SET TIMEZONE TO 'Antarctica/Syowa'; -SET -SET TIMEZONE TO 'Antarctica/Mawson'; -SET -SET TIMEZONE TO 'Antarctica/McMurdo'; -SET -SET TIMEZONE TO 'Antarctica/Palmer'; -SET -SET TIMEZONE TO 'Antarctica/Macquarie'; -SET -SET TIMEZONE TO 'Antarctica/Rothera'; -SET -SET TIMEZONE TO 'CST6CDT'; -SET -SET TIMEZONE TO 'Libya'; -SET -SET TIMEZONE TO 'Kwajalein'; -SET -SET TIMEZONE TO 'Cuba'; -SET -SET TIMEZONE TO 'Mexico/BajaNorte'; -SET -SET TIMEZONE TO 'Mexico/General'; -SET -SET TIMEZONE TO 'Mexico/BajaSur'; -SET -SET TIMEZONE TO 'WET'; -SET -SET TIMEZONE TO 'Singapore'; -SET -SET TIMEZONE TO 'Brazil/DeNoronha'; -SET -SET TIMEZONE TO 'Brazil/Acre'; -SET -SET TIMEZONE TO 'Brazil/West'; -SET -SET TIMEZONE TO 'Brazil/East'; -SET -SET TIMEZONE TO 'MST7MDT'; -SET -SET TIMEZONE TO 'US/Aleutian'; -SET -SET TIMEZONE TO 'US/Samoa'; -SET -SET TIMEZONE TO 'US/Eastern'; -SET -SET TIMEZONE TO 'US/Alaska'; -SET -SET TIMEZONE TO 'US/Indiana-Starke'; -SET -SET TIMEZONE TO 'US/Michigan'; -SET -SET TIMEZONE TO 'US/Central'; -SET -SET TIMEZONE TO 'US/East-Indiana'; -SET -SET TIMEZONE TO 'US/Mountain'; -SET -SET TIMEZONE TO 'US/Hawaii'; -SET -SET TIMEZONE TO 'US/Pacific'; -SET -SET TIMEZONE TO 'US/Arizona'; -SET -SET TIMEZONE TO 'Universal'; -SET -SET TIMEZONE TO 'Asia/Dushanbe'; -SET -SET TIMEZONE TO 'Asia/Oral'; -SET -SET TIMEZONE TO 'Asia/Magadan'; -SET -SET TIMEZONE TO 'Asia/Hong_Kong'; -SET -SET TIMEZONE TO 'Asia/Aden'; -SET -SET TIMEZONE TO 'Asia/Tomsk'; -SET -SET TIMEZONE TO 'Asia/Aqtobe'; -SET -SET TIMEZONE TO 'Asia/Pontianak'; -SET -SET TIMEZONE TO 'Asia/Kuching'; -SET -SET TIMEZONE TO 'Asia/Tbilisi'; -SET -SET TIMEZONE TO 'Asia/Novosibirsk'; -SET -SET TIMEZONE TO 'Asia/Chita'; -SET -SET TIMEZONE TO 'Asia/Hebron'; -SET -SET TIMEZONE TO 'Asia/Choibalsan'; -SET -SET TIMEZONE TO 'Asia/Qyzylorda'; -SET -SET TIMEZONE TO 'Asia/Jakarta'; -SET -SET TIMEZONE TO 'Asia/Colombo'; -SET -SET TIMEZONE TO 'Asia/Dili'; -SET -SET TIMEZONE TO 'Asia/Thimphu'; -SET -SET TIMEZONE TO 'Asia/Tashkent'; -SET -SET TIMEZONE TO 'Asia/Ujung_Pandang'; -SET -SET TIMEZONE TO 'Asia/Ulaanbaatar'; -SET -SET TIMEZONE TO 'Asia/Jerusalem'; -SET -SET TIMEZONE TO 'Asia/Pyongyang'; -SET -SET TIMEZONE TO 'Asia/Vladivostok'; -SET -SET TIMEZONE TO 'Asia/Samarkand'; -SET -SET TIMEZONE TO 'Asia/Beirut'; -SET -SET TIMEZONE TO 'Asia/Shanghai'; -SET -SET TIMEZONE TO 'Asia/Kabul'; -SET -SET TIMEZONE TO 'Asia/Bangkok'; -SET -SET TIMEZONE TO 'Asia/Almaty'; -SET -SET TIMEZONE TO 'Asia/Kathmandu'; -SET -SET TIMEZONE TO 'Asia/Ust-Nera'; -SET -SET TIMEZONE TO 'Asia/Yangon'; -SET -SET TIMEZONE TO 'Asia/Novokuznetsk'; -SET -SET TIMEZONE TO 'Asia/Qatar'; -SET -SET TIMEZONE TO 'Asia/Baghdad'; -SET -SET TIMEZONE TO 'Asia/Srednekolymsk'; -SET -SET TIMEZONE TO 'Asia/Hovd'; -SET -SET TIMEZONE TO 'Asia/Istanbul'; -SET -SET TIMEZONE TO 'Asia/Omsk'; -SET -SET TIMEZONE TO 'Asia/Macau'; -SET -SET TIMEZONE TO 'Asia/Yekaterinburg'; -SET -SET TIMEZONE TO 'Asia/Vientiane'; -SET -SET TIMEZONE TO 'Asia/Famagusta'; -SET -SET TIMEZONE TO 'Asia/Urumqi'; -SET -SET TIMEZONE TO 'Asia/Kuwait'; -SET -SET TIMEZONE TO 'Asia/Dhaka'; -SET -SET TIMEZONE TO 'Asia/Ulan_Bator'; -SET -SET TIMEZONE TO 'Asia/Dubai'; -SET -SET TIMEZONE TO 'Asia/Saigon'; -SET -SET TIMEZONE TO 'Asia/Muscat'; -SET -SET TIMEZONE TO 'Asia/Tehran'; -SET -SET TIMEZONE TO 'Asia/Ho_Chi_Minh'; -SET -SET TIMEZONE TO 'Asia/Aqtau'; -SET -SET TIMEZONE TO 'Asia/Bishkek'; -SET -SET TIMEZONE TO 'Asia/Kashgar'; -SET -SET TIMEZONE TO 'Asia/Gaza'; -SET -SET TIMEZONE TO 'Asia/Riyadh'; -SET -SET TIMEZONE TO 'Asia/Ashkhabad'; -SET -SET TIMEZONE TO 'Asia/Khandyga'; -SET -SET TIMEZONE TO 'Asia/Anadyr'; -SET -SET TIMEZONE TO 'Asia/Brunei'; -SET -SET TIMEZONE TO 'Asia/Phnom_Penh'; -SET -SET TIMEZONE TO 'Asia/Baku'; -SET -SET TIMEZONE TO 'Asia/Sakhalin'; -SET -SET TIMEZONE TO 'Asia/Atyrau'; -SET -SET TIMEZONE TO 'Asia/Ashgabat'; -SET -SET TIMEZONE TO 'Asia/Thimbu'; -SET -SET TIMEZONE TO 'Asia/Damascus'; -SET -SET TIMEZONE TO 'Asia/Kolkata'; -SET -SET TIMEZONE TO 'Asia/Jayapura'; -SET -SET TIMEZONE TO 'Asia/Tokyo'; -SET -SET TIMEZONE TO 'Asia/Katmandu'; -SET -SET TIMEZONE TO 'Asia/Bahrain'; -SET -SET TIMEZONE TO 'Asia/Tel_Aviv'; -SET -SET TIMEZONE TO 'Asia/Singapore'; -SET -SET TIMEZONE TO 'Asia/Krasnoyarsk'; -SET -SET TIMEZONE TO 'Asia/Seoul'; -SET -SET TIMEZONE TO 'Asia/Barnaul'; -SET -SET TIMEZONE TO 'Asia/Yakutsk'; -SET -SET TIMEZONE TO 'Asia/Irkutsk'; -SET -SET TIMEZONE TO 'Asia/Macao'; -SET -SET TIMEZONE TO 'Asia/Taipei'; -SET -SET TIMEZONE TO 'Asia/Kamchatka'; -SET -SET TIMEZONE TO 'Asia/Yerevan'; -SET -SET TIMEZONE TO 'Asia/Harbin'; -SET -SET TIMEZONE TO 'Asia/Manila'; -SET -SET TIMEZONE TO 'Asia/Qostanay'; -SET -SET TIMEZONE TO 'Asia/Amman'; -SET -SET TIMEZONE TO 'Asia/Nicosia'; -SET -SET TIMEZONE TO 'Asia/Karachi'; -SET -SET TIMEZONE TO 'Asia/Rangoon'; -SET -SET TIMEZONE TO 'Asia/Chungking'; -SET -SET TIMEZONE TO 'Asia/Chongqing'; -SET -SET TIMEZONE TO 'Asia/Makassar'; -SET -SET TIMEZONE TO 'Asia/Dacca'; -SET -SET TIMEZONE TO 'Asia/Kuala_Lumpur'; -SET -SET TIMEZONE TO 'Asia/Calcutta'; -SET -SET TIMEZONE TO 'EST5EDT'; -SET -SET TIMEZONE TO 'GMT+0'; -SET -SET TIMEZONE TO 'Pacific/Wake'; -SET -SET TIMEZONE TO 'Pacific/Samoa'; -SET -SET TIMEZONE TO 'Pacific/Efate'; -SET -SET TIMEZONE TO 'Pacific/Niue'; -SET -SET TIMEZONE TO 'Pacific/Pago_Pago'; -SET -SET TIMEZONE TO 'Pacific/Pitcairn'; -SET -SET TIMEZONE TO 'Pacific/Saipan'; -SET -SET TIMEZONE TO 'Pacific/Norfolk'; -SET -SET TIMEZONE TO 'Pacific/Yap'; -SET -SET TIMEZONE TO 'Pacific/Enderbury'; -SET -SET TIMEZONE TO 'Pacific/Port_Moresby'; -SET -SET TIMEZONE TO 'Pacific/Funafuti'; -SET -SET TIMEZONE TO 'Pacific/Apia'; -SET -SET TIMEZONE TO 'Pacific/Rarotonga'; -SET -SET TIMEZONE TO 'Pacific/Ponape'; -SET -SET TIMEZONE TO 'Pacific/Wallis'; -SET -SET TIMEZONE TO 'Pacific/Johnston'; -SET -SET TIMEZONE TO 'Pacific/Guam'; -SET -SET TIMEZONE TO 'Pacific/Guadalcanal'; -SET -SET TIMEZONE TO 'Pacific/Chatham'; -SET -SET TIMEZONE TO 'Pacific/Truk'; -SET -SET TIMEZONE TO 'Pacific/Fakaofo'; -SET -SET TIMEZONE TO 'Pacific/Kosrae'; -SET -SET TIMEZONE TO 'Pacific/Kiritimati'; -SET -SET TIMEZONE TO 'Pacific/Gambier'; -SET -SET TIMEZONE TO 'Pacific/Kwajalein'; -SET -SET TIMEZONE TO 'Pacific/Midway'; -SET -SET TIMEZONE TO 'Pacific/Pohnpei'; -SET -SET TIMEZONE TO 'Pacific/Majuro'; -SET -SET TIMEZONE TO 'Pacific/Tahiti'; -SET -SET TIMEZONE TO 'Pacific/Fiji'; -SET -SET TIMEZONE TO 'Pacific/Tongatapu'; -SET -SET TIMEZONE TO 'Pacific/Palau'; -SET -SET TIMEZONE TO 'Pacific/Galapagos'; -SET -SET TIMEZONE TO 'Pacific/Marquesas'; -SET -SET TIMEZONE TO 'Pacific/Bougainville'; -SET -SET TIMEZONE TO 'Pacific/Honolulu'; -SET -SET TIMEZONE TO 'Pacific/Noumea'; -SET -SET TIMEZONE TO 'Pacific/Auckland'; -SET -SET TIMEZONE TO 'Pacific/Chuuk'; -SET -SET TIMEZONE TO 'Pacific/Nauru'; -SET -SET TIMEZONE TO 'Pacific/Easter'; -SET -SET TIMEZONE TO 'Pacific/Tarawa'; -SET -SET TIMEZONE TO 'America/Detroit'; -SET -SET TIMEZONE TO 'America/Barbados'; -SET -SET TIMEZONE TO 'America/North_Dakota/New_Salem'; -SET -SET TIMEZONE TO 'America/North_Dakota/Center'; -SET -SET TIMEZONE TO 'America/North_Dakota/Beulah'; -SET -SET TIMEZONE TO 'America/Thunder_Bay'; -SET -SET TIMEZONE TO 'America/Panama'; -SET -SET TIMEZONE TO 'America/Cancun'; -SET -SET TIMEZONE TO 'America/Santo_Domingo'; -SET -SET TIMEZONE TO 'America/Matamoros'; -SET -SET TIMEZONE TO 'America/Port-au-Prince'; -SET -SET TIMEZONE TO 'America/Atikokan'; -SET -SET TIMEZONE TO 'America/Knox_IN'; -SET -SET TIMEZONE TO 'America/Cayenne'; -SET -SET TIMEZONE TO 'America/Kralendijk'; -SET -SET TIMEZONE TO 'America/Iqaluit'; -SET -SET TIMEZONE TO 'America/Paramaribo'; -SET -SET TIMEZONE TO 'America/Aruba'; -SET -SET TIMEZONE TO 'America/Vancouver'; -SET -SET TIMEZONE TO 'America/Noronha'; -SET -SET TIMEZONE TO 'America/Ojinaga'; -SET -SET TIMEZONE TO 'America/Atka'; -SET -SET TIMEZONE TO 'America/St_Johns'; -SET -SET TIMEZONE TO 'America/Mexico_City'; -SET -SET TIMEZONE TO 'America/Rosario'; -SET -SET TIMEZONE TO 'America/Nipigon'; -SET -SET TIMEZONE TO 'America/Costa_Rica'; -SET -SET TIMEZONE TO 'America/Regina'; -SET -SET TIMEZONE TO 'America/La_Paz'; -SET -SET TIMEZONE TO 'America/Jamaica'; -SET -SET TIMEZONE TO 'America/Anchorage'; -SET -SET TIMEZONE TO 'America/St_Kitts'; -SET -SET TIMEZONE TO 'America/Godthab'; -SET -SET TIMEZONE TO 'America/Swift_Current'; -SET -SET TIMEZONE TO 'America/Danmarkshavn'; -SET -SET TIMEZONE TO 'America/Phoenix'; -SET -SET TIMEZONE TO 'America/Lower_Princes'; -SET -SET TIMEZONE TO 'America/Yakutat'; -SET -SET TIMEZONE TO 'America/Menominee'; -SET -SET TIMEZONE TO 'America/Bahia'; -SET -SET TIMEZONE TO 'America/Montserrat'; -SET -SET TIMEZONE TO 'America/Miquelon'; -SET -SET TIMEZONE TO 'America/Anguilla'; -SET -SET TIMEZONE TO 'America/Grand_Turk'; -SET -SET TIMEZONE TO 'America/Coral_Harbour'; -SET -SET TIMEZONE TO 'America/Glace_Bay'; -SET -SET TIMEZONE TO 'America/Boa_Vista'; -SET -SET TIMEZONE TO 'America/Dominica'; -SET -SET TIMEZONE TO 'America/Goose_Bay'; -SET -SET TIMEZONE TO 'America/Caracas'; -SET -SET TIMEZONE TO 'America/Boise'; -SET -SET TIMEZONE TO 'America/Cayman'; -SET -SET TIMEZONE TO 'America/Puerto_Rico'; -SET -SET TIMEZONE TO 'America/Guyana'; -SET -SET TIMEZONE TO 'America/Metlakatla'; -SET -SET TIMEZONE TO 'America/Cuiaba'; -SET -SET TIMEZONE TO 'America/Virgin'; -SET -SET TIMEZONE TO 'America/Campo_Grande'; -SET -SET TIMEZONE TO 'America/Maceio'; -SET -SET TIMEZONE TO 'America/Scoresbysund'; -SET -SET TIMEZONE TO 'America/Guadeloupe'; -SET -SET TIMEZONE TO 'America/Indiana/Vevay'; -SET -SET TIMEZONE TO 'America/Indiana/Winamac'; -SET -SET TIMEZONE TO 'America/Indiana/Petersburg'; -SET -SET TIMEZONE TO 'America/Indiana/Marengo'; -SET -SET TIMEZONE TO 'America/Indiana/Knox'; -SET -SET TIMEZONE TO 'America/Indiana/Indianapolis'; -SET -SET TIMEZONE TO 'America/Indiana/Vincennes'; -SET -SET TIMEZONE TO 'America/Indiana/Tell_City'; -SET -SET TIMEZONE TO 'America/Manaus'; -SET -SET TIMEZONE TO 'America/Resolute'; -SET -SET TIMEZONE TO 'America/Pangnirtung'; -SET -SET TIMEZONE TO 'America/Winnipeg'; -SET diff --git a/src/test/isolation2/expected/resgroup/resgroup_cancel_terminate_concurrency.out b/src/test/isolation2/expected/resgroup/resgroup_cancel_terminate_concurrency.out index d1aa7e0703a..cbc136a6f97 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_cancel_terminate_concurrency.out +++ b/src/test/isolation2/expected/resgroup/resgroup_cancel_terminate_concurrency.out @@ -9,7 +9,7 @@ ERROR: resource group "rg_concurrency_test" does not exist CREATE OR REPLACE VIEW rg_concurrency_view AS SELECT wait_event_type IS NOT NULL as waiting, wait_event_type, state, query, rsgname FROM pg_stat_activity WHERE rsgname='rg_concurrency_test'; CREATE -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -64,7 +64,7 @@ DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -123,7 +123,7 @@ DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -184,7 +184,7 @@ DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -251,7 +251,7 @@ DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE diff --git a/src/test/isolation2/expected/resgroup/resgroup_concurrency.out b/src/test/isolation2/expected/resgroup/resgroup_concurrency.out index 7d30d7633ce..d691c034e1b 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_concurrency.out +++ b/src/test/isolation2/expected/resgroup/resgroup_concurrency.out @@ -6,7 +6,7 @@ DROP DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -18,6 +18,7 @@ SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_to ---------------------+-------------+--------------+------------+-------------- rg_concurrency_test | 0 | 0 | 0 | 0 (1 row) + 2:SET ROLE role_concurrency_test; SET 2:BEGIN; @@ -36,6 +37,7 @@ SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_to ---------------------+-------------+--------------+------------+-------------- rg_concurrency_test | 2 | 1 | 1 | 2 (1 row) + SELECT wait_event from pg_stat_activity where query = 'BEGIN;' and state = 'active' and rsgname = 'rg_concurrency_test' and wait_event_type='ResourceGroup'; wait_event --------------------- @@ -64,14 +66,15 @@ DROP -- test2: test alter concurrency -- Create a resource group with concurrency=2. Prepare 2 running transactions and 1 queueing transactions. --- Alter concurrency 2->3, the queueing transaction will be woken up, the 'value' of pg_resgroupcapability will be set to 3. +-- Alter concurrency 2->3, the queueing transaction will be woken up, the 'value' of pg_resgroupcapability +-- will be set to 3. DROP ROLE IF EXISTS role_concurrency_test; DROP -- start_ignore DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -91,6 +94,7 @@ SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_to ---------------------+-------------+--------------+------------+-------------- rg_concurrency_test | 2 | 1 | 1 | 2 (1 row) + SELECT concurrency FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_concurrency_test'; concurrency ------------- @@ -132,7 +136,7 @@ DROP DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=3, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=3, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -233,7 +237,7 @@ DROP DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -266,7 +270,7 @@ DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -307,7 +311,7 @@ DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -336,7 +340,7 @@ DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -372,7 +376,7 @@ DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -396,7 +400,7 @@ DROP -- Test cursors, pl/* functions only take one slot. -- -- set concurrency to 1 -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE diff --git a/src/test/isolation2/expected/resgroup/resgroup_cpuset_empty_default.out b/src/test/isolation2/expected/resgroup/resgroup_cpuset_empty_default.out index 160f6cd7db3..c84e98b5779 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_cpuset_empty_default.out +++ b/src/test/isolation2/expected/resgroup/resgroup_cpuset_empty_default.out @@ -18,7 +18,7 @@ DROP RESOURCE GROUP rg1_cpuset_test; -- Create a resource group with all the cpu cores. -- The isolation2 test framework does not support \set so we have to plan with -- some tricks. -! psql -d isolation2resgrouptest -Ac "CREATE RESOURCE GROUP rg1_cpuset_test WITH (memory_limit=10, cpuset='0-$(($(nproc)-1))')"; +! psql -d isolation2resgrouptest -Ac "CREATE RESOURCE GROUP rg1_cpuset_test WITH (cpuset='0-$(($(nproc)-1))')"; CREATE RESOURCE GROUP diff --git a/src/test/isolation2/expected/resgroup/resgroup_dumpinfo.out b/src/test/isolation2/expected/resgroup/resgroup_dumpinfo.out index 4f4e18f2e31..833cbe951ae 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_dumpinfo.out +++ b/src/test/isolation2/expected/resgroup/resgroup_dumpinfo.out @@ -10,8 +10,8 @@ CREATE -- end_ignore CREATE FUNCTION dump_test_check() RETURNS bool as $$ import json import pg -def validate(json_obj, segnum): array = json_obj.get("info") #validate segnum if len(array) != segnum: return False qd_info = [j for j in array if j["segid"] == -1][0] #validate keys keys = ["segid", "segmentsOnMaster", "loaded", "totalChunks", "freeChunks", "chunkSizeInBits", "groups"] for key in keys: if key not in qd_info: return False -groups = [g for g in qd_info["groups"] if g["group_id"] > 6441] #validate user created group if len(groups) != 1: return False group = groups[0] #validate group keys keys = ["group_id", "nRunning", "locked_for_drop", "memExpected", "memQuotaGranted", "memSharedGranted", "memQuotaUsed", "memUsage", "memSharedUsage"] for key in keys: if key not in group: return False +def validate(json_obj, segnum): array = json_obj.get("info") #validate segnum if len(array) != segnum: return False qd_info = [j for j in array if j["segid"] == -1][0] #validate keys keys = ["segid", "segmentsOnMaster", "loaded", "groups"] for key in keys: if key not in qd_info: return False +groups = [g for g in qd_info["groups"] if g["group_id"] > 6441] #validate user created group if len(groups) != 1: return False group = groups[0] #validate group keys keys = ["group_id", "nRunning", "locked_for_drop"] for key in keys: if key not in group: return False #validate waitqueue wait_queue = group["wait_queue"] if wait_queue["wait_queue_size"] != 1: return False #validate nrunning nrunning = group["nRunning"] if nrunning != 2: return False return True conn = pg.connect(dbname="postgres") @@ -21,7 +21,7 @@ return validate(json_obj, n) $$ LANGUAGE plpython3u; CREATE -CREATE RESOURCE GROUP rg_dumpinfo_test WITH (concurrency=2, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_dumpinfo_test WITH (concurrency=2, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_dumpinfo_test RESOURCE GROUP rg_dumpinfo_test; CREATE diff --git a/src/test/isolation2/expected/resgroup/resgroup_functions.out b/src/test/isolation2/expected/resgroup/resgroup_functions.out index 9e0c1e8a894..b44b37c6004 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_functions.out +++ b/src/test/isolation2/expected/resgroup/resgroup_functions.out @@ -1,5 +1,5 @@ -- start_ignore -SELECT s.groupid, s.num_running, s.num_queueing, s.num_queued, s.num_executed FROM pg_resgroup_get_status(NULL::oid) s(groupid, num_running, num_queueing, num_queued, num_executed, total_queue_duration, cpu_usage, memory_usage); +SELECT s.groupid, s.num_running, s.num_queueing, s.num_queued, s.num_executed FROM pg_resgroup_get_status(NULL::oid) s(groupid, num_running, num_queueing, num_queued, num_executed, total_queue_duration, cpu_usage); groupid | num_running | num_queueing | num_queued | num_executed ---------+-------------+--------------+------------+-------------- (0 rows) @@ -7,7 +7,7 @@ SELECT s.groupid, s.num_running, s.num_queueing, s.num_queued, s.num_executed FR CREATE TEMP TABLE resgroup_function_test(LIKE gp_toolkit.gp_resgroup_status); CREATE -INSERT INTO resgroup_function_test(groupid, num_running, num_queueing, num_queued, num_executed) SELECT s.groupid, s.num_running, s.num_queueing, s.num_queued, s.num_executed FROM pg_resgroup_get_status(NULL::oid) s(groupid, num_running, num_queueing, num_queued, num_executed, total_queue_duration, cpu_usage, memory_usage) LIMIT 1; +INSERT INTO resgroup_function_test(groupid, num_running, num_queueing, num_queued, num_executed) SELECT s.groupid, s.num_running, s.num_queueing, s.num_queued, s.num_executed FROM pg_resgroup_get_status(NULL::oid) s(groupid, num_running, num_queueing, num_queued, num_executed, total_queue_duration, cpu_usage) LIMIT 1; INSERT 1 SELECT count(num_executed)>0 FROM resgroup_function_test WHERE num_executed IS NOT NULL; diff --git a/src/test/isolation2/expected/resgroup/resgroup_large_group_id.out b/src/test/isolation2/expected/resgroup/resgroup_large_group_id.out index 558cfad58a3..1288fba6781 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_large_group_id.out +++ b/src/test/isolation2/expected/resgroup/resgroup_large_group_id.out @@ -5,7 +5,7 @@ select gp_inject_fault('bump_oid', 'skip', dbid) from gp_segment_configuration w Success: (1 row) -create resource group rg_large_oid with (cpu_rate_limit=20, memory_limit=10); +create resource group rg_large_oid with (cpu_hard_quota_limit=20); CREATE select gp_inject_fault('bump_oid', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = -1; diff --git a/src/test/isolation2/expected/resgroup/resgroup_memory_hashagg_spill.out b/src/test/isolation2/expected/resgroup/resgroup_memory_hashagg_spill.out deleted file mode 100644 index d3422c7a85f..00000000000 --- a/src/test/isolation2/expected/resgroup/resgroup_memory_hashagg_spill.out +++ /dev/null @@ -1,161 +0,0 @@ --- start_matchsubs --- m/INSERT \d+/ --- s/INSERT \d+/INSERT/ --- end_matchsubs -create schema hashagg_spill; -CREATE -set search_path to hashagg_spill; -SET - --- start_ignore -create language plpython3u; -CREATE --- end_ignore - --- set workfile is created to true if all segment did it. -create or replace function hashagg_spill.is_workfile_created(explain_query text) returns setof int as $$ import re query = "select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0;" rv = plpy.execute(query) nsegments = int(rv[0]['nsegments']) rv = plpy.execute(explain_query) search_text = 'Work_mem used' result = [] for i in range(len(rv)): cur_line = rv[i]['QUERY PLAN'] if search_text.lower() in cur_line.lower(): p = re.compile('.+\((seg[\d]+).+ Workfile: \(([\d+]) spilling\)') m = p.match(cur_line) if not m: continue workfile_created = int(m.group(2)) cur_row = int(workfile_created == nsegments) result.append(cur_row) return result $$ language plpython3u; -CREATE - -create table testhagg (i1 int, i2 int, i3 int, i4 int); -CREATE -insert into testhagg select i,i,i,i from (select generate_series(1, nsegments * 17000) as i from (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; -INSERT 51000 - - --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -ERROR: resource group "rg1_memory_test" does not exist --- end_ignore -CREATE ROLE role1_memory_test SUPERUSER; -CREATE -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=10); -CREATE -SET ROLE TO role1_memory_test; -SET - -0: ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SPILL_RATIO 2; -ALTER -0: ALTER ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -ALTER -set gp_resgroup_print_operator_memory_limits=on; -SET - --- the number of rows returned by the query varies depending on the number of segments, so --- only print the first 10 -select * from (select max(i1) from testhagg group by i2) foo order by 1 limit 10; - max ------ - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 -(10 rows) -select * from hashagg_spill.is_workfile_created('explain analyze select max(i1) from testhagg group by i2;'); - is_workfile_created ---------------------- - 1 -(1 row) -select * from hashagg_spill.is_workfile_created('explain analyze select max(i1) from testhagg group by i2 limit 45000;'); - is_workfile_created ---------------------- - 1 -(1 row) - - --- Test HashAgg with increasing amount of overflows - -reset all; -RESET - --- Returns the number of overflows from EXPLAIN ANALYZE output -create or replace function hashagg_spill.num_hashagg_overflows(explain_query text) returns setof int as $$ import re query = "select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0;" rv = plpy.execute(query) rv = plpy.execute(explain_query) result = [] for i in range(len(rv)): cur_line = rv[i]['QUERY PLAN'] p = re.compile('.+\((seg[\d]+).+ ([\d+]) overflows;') m = p.match(cur_line) if m: overflows = int(m.group(2)) result.append(overflows) return result $$ language plpython3u; -CREATE - --- Test agg spilling scenarios -drop table if exists aggspill; -DROP -create table aggspill (i int, j int, t text) distributed by (i); -CREATE -insert into aggspill select i, i*2, i::text from generate_series(1, 10000) i; -INSERT 10000 -insert into aggspill select i, i*2, i::text from generate_series(1, 100000) i; -INSERT 100000 -insert into aggspill select i, i*2, i::text from generate_series(1, 1000000) i; -INSERT 1000000 - --- No spill with large statement memory -0: ALTER ROLE role1_memory_test RESOURCE GROUP none; -ALTER -0: DROP RESOURCE GROUP rg1_memory_test; -DROP -0: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=1, cpu_rate_limit=10, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=30); -CREATE -0: ALTER ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -ALTER -select count(*) from (select i, count(*) from aggspill group by i,j having count(*) = 1) g; - count --------- - 900000 -(1 row) - --- Reduce the statement memory to induce spilling -0: ALTER ROLE role1_memory_test RESOURCE GROUP none; -ALTER -0: DROP RESOURCE GROUP rg1_memory_test; -DROP -0: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=10); -CREATE -0: ALTER ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -ALTER -select overflows >= 1 from hashagg_spill.num_hashagg_overflows('explain analyze select count(*) from (select i, count(*) from aggspill group by i,j having count(*) = 2) g') overflows; - ?column? ----------- - t -(1 row) -select count(*) from (select i, count(*) from aggspill group by i,j having count(*) = 2) g; - count -------- - 90000 -(1 row) - --- Reduce the statement memory, nbatches and entrysize even further to cause multiple overflows -set gp_hashagg_default_nbatches = 4; -SET -0: ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SPILL_RATIO 5; -ALTER -0: ALTER ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -ALTER - -select overflows > 1 from hashagg_spill.num_hashagg_overflows('explain analyze select count(*) from (select i, count(*) from aggspill group by i,j,t having count(*) = 3) g') overflows; - ?column? ----------- - t -(1 row) - -select count(*) from (select i, count(*) from aggspill group by i,j,t having count(*) = 3) g; - count -------- - 10000 -(1 row) - -drop schema hashagg_spill cascade; -DROP -drop table aggspill; -DROP - --- start_ignore -RESET ROLE; -RESET -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP --- end_ignore diff --git a/src/test/isolation2/expected/resgroup/resgroup_memory_hashjoin_spill.out b/src/test/isolation2/expected/resgroup/resgroup_memory_hashjoin_spill.out deleted file mode 100644 index 4fea4e63a49..00000000000 --- a/src/test/isolation2/expected/resgroup/resgroup_memory_hashjoin_spill.out +++ /dev/null @@ -1,105 +0,0 @@ --- start_matchsubs --- m/INSERT \d+/ --- s/INSERT \d+/INSERT/ --- end_matchsubs -create schema hashjoin_spill; -CREATE -set search_path to hashjoin_spill; -SET - --- start_ignore -create language plpython3u; -ERROR: language "plpython3u" already exists --- end_ignore - --- set workfile is created to true if all segment did it. -create or replace function hashjoin_spill.is_workfile_created(explain_query text) returns setof int as $$ import re query = "select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0;" rv = plpy.execute(query) nsegments = int(rv[0]['nsegments']) rv = plpy.execute(explain_query) search_text = 'Work_mem used' result = [] for i in range(len(rv)): cur_line = rv[i]['QUERY PLAN'] if search_text.lower() in cur_line.lower(): p = re.compile('.+\((seg[\d]+).+ Workfile: \(([\d+]) spilling\)') m = p.match(cur_line) workfile_created = int(m.group(2)) cur_row = int(workfile_created == nsegments) result.append(cur_row) return result $$ language plpython3u; -CREATE - --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -ERROR: resource group "rg1_memory_test" does not exist --- end_ignore -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=1); -CREATE -CREATE ROLE role1_memory_test SUPERUSER RESOURCE GROUP rg1_memory_test; -CREATE -SET ROLE TO role1_memory_test; -SET - -CREATE TABLE test_hj_spill (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8 int); -CREATE -insert into test_hj_spill SELECT i,i,i%1000,i,i,i,i,i from (select generate_series(1, nsegments * 15000) as i from (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; -INSERT 45000 -set gp_resgroup_print_operator_memory_limits=on; -SET - -set gp_workfile_type_hashjoin=buffile; -SET -select avg(i3) from (SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2) foo; - avg -------- - 499.5 -(1 row) -select * from hashjoin_spill.is_workfile_created('explain analyze SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2;'); - is_workfile_created ---------------------- - 1 -(1 row) -select * from hashjoin_spill.is_workfile_created('explain analyze SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2 LIMIT 15000;'); - is_workfile_created ---------------------- - 1 -(1 row) - -set gp_workfile_type_hashjoin=bfz; -SET -set gp_workfile_compress_algorithm=zlib; -SET -select avg(i3) from (SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2) foo; - avg -------- - 499.5 -(1 row) -select * from hashjoin_spill.is_workfile_created('explain analyze SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2'); - is_workfile_created ---------------------- - 1 -(1 row) -select * from hashjoin_spill.is_workfile_created('explain analyze SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2 LIMIT 15000;'); - is_workfile_created ---------------------- - 1 -(1 row) - -set gp_workfile_compress_algorithm=NONE; -SET -select avg(i3) from (SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2) foo; - avg -------- - 499.5 -(1 row) -select * from hashjoin_spill.is_workfile_created('explain analyze SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2'); - is_workfile_created ---------------------- - 1 -(1 row) -select * from hashjoin_spill.is_workfile_created('explain analyze SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2 LIMIT 15000;'); - is_workfile_created ---------------------- - 1 -(1 row) - -drop schema hashjoin_spill cascade; -DROP - --- start_ignore -RESET ROLE; -RESET -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP --- end_ignore diff --git a/src/test/isolation2/expected/resgroup/resgroup_memory_materialize_spill.out b/src/test/isolation2/expected/resgroup/resgroup_memory_materialize_spill.out deleted file mode 100644 index 8df1d3b0744..00000000000 --- a/src/test/isolation2/expected/resgroup/resgroup_memory_materialize_spill.out +++ /dev/null @@ -1,122 +0,0 @@ --- start_matchsubs --- m/INSERT \d+/ --- s/INSERT \d+/INSERT/ --- end_matchsubs -create schema materialize_spill; -CREATE -set search_path to materialize_spill; -SET - --- start_ignore -create language plpython3u; -CREATE --- end_ignore - --- Helper function to verify that a plan spilled to disk. For each node --- in the plan that used Workfiles (Materialize or Sort nodes, currently), --- return the number of segments where the node spilled to disk. -create or replace function num_workfiles_created(explain_query text) returns setof int as $$ import re rv = plpy.execute(explain_query) search_text = 'Work_mem used' result = [] for i in range(len(rv)): cur_line = rv[i]['QUERY PLAN'] if search_text.lower() in cur_line.lower(): p = re.compile('.+\((seg[\d]+).+ Workfile: \(([\d+]) spilling\)') m = p.match(cur_line) workfile_created = int(m.group(2)) result.append(workfile_created) return result $$ language plpython3u; -CREATE - --- Run a query that contains a Materialize node that spills to disk. --- --- The expected plan is something like this: --- --- Gather Motion 3:1 --- -> Nested Loop Left Join --- Join Filter: t1.i1 = t2.i2 --- -> Seq Scan on test_mat_small t1 --- -> Materialize --- -> Redistribute Motion 3:3 --- Hash Key: t2.i2 --- -> Seq Scan on test_mat_large t2 --- --- The planner will put a Materialize node on the inner side, to shield --- the Motion node from rewinding. Because the larger table doesn't fit --- in memory, the Materialize will spill to disk. --- -CREATE TABLE test_mat_small (i1 int); -CREATE -INSERT INTO test_mat_small SELECT i from generate_series(101, 105) i; -INSERT 5 - --- Scale the larger table's size with the number of segments, so that there is enough --- data on every segment to cause spilling. -CREATE TABLE test_mat_large (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8 int); -CREATE -INSERT INTO test_mat_large SELECT i,i,i,i,i,i,i,i from (select generate_series(1, nsegments * 50000) as i from (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; -INSERT 150000 - --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -ERROR: resource group "rg1_memory_test" does not exist --- end_ignore -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=1); -CREATE -CREATE ROLE role1_memory_test SUPERUSER RESOURCE GROUP rg1_memory_test; -CREATE -SET ROLE TO role1_memory_test; -SET - -set gp_resgroup_print_operator_memory_limits=on; -SET -set enable_hashjoin = false; -SET -set enable_nestloop = true; -SET --- ORCA doesn't honor enable_nestloop/enable_hashjoin, so this won't produce --- the kind of plan we're looking for. -set optimizer=off; -SET - --- This is the actual test query. -select * FROM test_mat_small as t1 left outer join test_mat_large AS t2 on t1.i1=t2.i2; - i1 | i1 | i2 | i3 | i4 | i5 | i6 | i7 | i8 ------+-----+-----+-----+-----+-----+-----+-----+----- - 101 | 101 | 101 | 101 | 101 | 101 | 101 | 101 | 101 - 102 | 102 | 102 | 102 | 102 | 102 | 102 | 102 | 102 - 103 | 103 | 103 | 103 | 103 | 103 | 103 | 103 | 103 - 104 | 104 | 104 | 104 | 104 | 104 | 104 | 104 | 104 - 105 | 105 | 105 | 105 | 105 | 105 | 105 | 105 | 105 -(5 rows) - --- Check that the Materialize node spilled to disk, to make sure we're testing spilling --- as intended. The inner side of the join with the Materialize will not get executed on --- segments that have no data for the outer side. Therefore, we expect the Materialize --- node to only be executed, and spilled, on as many segments as there nodes that hold --- data from test_mat_small. -select n - (select count (distinct gp_segment_id) from test_mat_small) as difference from num_workfiles_created($$ explain analyze select * FROM test_mat_small as t1 left outer join test_mat_large AS t2 on t1.i1=t2.i2 $$) as n; - difference ------------- - 0 -(1 row) - --- Repeat, with a LIMIT. This causes the underlying scan to finish earlier. -select * FROM test_mat_small as t1 left outer join test_mat_large AS t2 on t1.i1=t2.i2 limit 10; - i1 | i1 | i2 | i3 | i4 | i5 | i6 | i7 | i8 ------+-----+-----+-----+-----+-----+-----+-----+----- - 101 | 101 | 101 | 101 | 101 | 101 | 101 | 101 | 101 - 102 | 102 | 102 | 102 | 102 | 102 | 102 | 102 | 102 - 103 | 103 | 103 | 103 | 103 | 103 | 103 | 103 | 103 - 104 | 104 | 104 | 104 | 104 | 104 | 104 | 104 | 104 - 105 | 105 | 105 | 105 | 105 | 105 | 105 | 105 | 105 -(5 rows) -select n - (select count (distinct gp_segment_id) from test_mat_small) as difference from num_workfiles_created($$ explain analyze select * FROM test_mat_small as t1 left outer join test_mat_large AS t2 on t1.i1=t2.i2 limit 10 $$) as n; - difference ------------- - 0 -(1 row) - -drop schema materialize_spill cascade; -DROP - --- start_ignore -RESET ROLE; -RESET -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP --- end_ignore diff --git a/src/test/isolation2/expected/resgroup/resgroup_memory_sisc_mat_sort.out b/src/test/isolation2/expected/resgroup/resgroup_memory_sisc_mat_sort.out deleted file mode 100644 index b40d944f5ce..00000000000 --- a/src/test/isolation2/expected/resgroup/resgroup_memory_sisc_mat_sort.out +++ /dev/null @@ -1,102 +0,0 @@ --- start_matchsubs --- m/INSERT \d+/ --- s/INSERT \d+/INSERT/ --- end_matchsubs -create schema sisc_mat_sort; -CREATE -set search_path to sisc_mat_sort; -SET - --- start_ignore -create language plpython3u; -ERROR: language "plpython3u" already exists --- end_ignore - --- set workfile is created to true if all segment did it. -create or replace function sisc_mat_sort.is_workfile_created(explain_query text) returns setof int as $$ import re query = "select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0;" rv = plpy.execute(query) nsegments = int(rv[0]['nsegments']) rv = plpy.execute(explain_query) search_text = 'Work_mem used' result = [] for i in range(len(rv)): cur_line = rv[i]['QUERY PLAN'] if search_text.lower() in cur_line.lower(): p = re.compile('.+\((seg[\d]+).+ Workfile: \(([\d+]) spilling\)') m = p.match(cur_line) workfile_created = int(m.group(2)) cur_row = int(workfile_created == nsegments) result.append(cur_row) return result $$ language plpython3u; -CREATE - -create table testsiscm (i1 int, i2 int, i3 int, i4 int); -CREATE -insert into testsiscm select i, i % 1000, i % 100000, i % 75 from (select generate_series(1, nsegments * 150000) as i from (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; -INSERT 300000 - --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -ERROR: resource group "rg1_memory_test" does not exist --- end_ignore -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=3); -CREATE -CREATE ROLE role1_memory_test SUPERUSER RESOURCE GROUP rg1_memory_test; -CREATE -SET ROLE TO role1_memory_test; -SET - - -set gp_resgroup_print_operator_memory_limits=on; -SET -set gp_cte_sharing=on; -SET -set gp_enable_mk_sort=on; -SET --- The expected output is very sensitive to the kind of plan this produces. --- We're testing the executor, not the planner, so force ORCA off, to get --- the particular plan -set optimizer=off; -SET - -select count(*) from (with ctesisc as (select count(i1) as c1, i2 as c2, i3 as c3 from testsiscm group by i2, i3) select * from ctesisc as t1, ctesisc as t2 where t1.c1 = t2.c1 and t1.c3 = t2.c3) foo; - count --------- - 100000 -(1 row) -select * from sisc_mat_sort.is_workfile_created('explain analyze with ctesisc as (select count(i1) as c1, i2 as c2, i3 as c3 from testsiscm group by i2, i3) select * from ctesisc as t1, ctesisc as t2 where t1.c1 = t2.c1 and t1.c3 = t2.c3;'); - is_workfile_created ---------------------- - 1 - 0 - 1 -(3 rows) -select * from sisc_mat_sort.is_workfile_created('explain analyze with ctesisc as (select count(i1) as c1, i2 as c2, i3 as c3 from testsiscm group by i2, i3) select * from ctesisc as t1, ctesisc as t2 where t1.c1 = t2.c1 and t1.c3 = t2.c3 limit 50000;'); - is_workfile_created ---------------------- - 1 - 0 - 1 -(3 rows) - -set gp_enable_mk_sort=off; -SET -select count(*) from (with ctesisc as (select count(i1) as c1, i2 as c2, i3 as c3 from testsiscm group by i2, i3) select * from ctesisc as t1, ctesisc as t2 where t1.c1 = t2.c1 and t1.c3 = t2.c3) foo; - count --------- - 100000 -(1 row) -select * from sisc_mat_sort.is_workfile_created('explain analyze with ctesisc as (select count(i1) as c1, i2 as c2, i3 as c3 from testsiscm group by i2, i3) select * from ctesisc as t1, ctesisc as t2 where t1.c1 = t2.c1 and t1.c3 = t2.c3;'); - is_workfile_created ---------------------- - 1 - 0 - 1 -(3 rows) -select * from sisc_mat_sort.is_workfile_created('explain analyze with ctesisc as (select count(i1) as c1, i2 as c2, i3 as c3 from testsiscm group by i2, i3) select * from ctesisc as t1, ctesisc as t2 where t1.c1 = t2.c1 and t1.c3 = t2.c3 limit 50000;'); - is_workfile_created ---------------------- - 1 - 0 - 1 -(3 rows) - -drop schema sisc_mat_sort cascade; -DROP - --- start_ignore -RESET ROLE; -RESET -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP --- end_ignore diff --git a/src/test/isolation2/expected/resgroup/resgroup_memory_sisc_sort_spill.out b/src/test/isolation2/expected/resgroup/resgroup_memory_sisc_sort_spill.out deleted file mode 100644 index 09e2bf31ee4..00000000000 --- a/src/test/isolation2/expected/resgroup/resgroup_memory_sisc_sort_spill.out +++ /dev/null @@ -1,100 +0,0 @@ --- start_matchsubs --- m/INSERT \d+/ --- s/INSERT \d+/INSERT/ --- end_matchsubs -create schema sisc_sort_spill; -CREATE -set search_path to sisc_sort_spill; -SET - --- start_ignore -create language plpython3u; -ERROR: language "plpython3u" already exists --- end_ignore - --- set workfile is created to true if all segment did it. -create or replace function sisc_sort_spill.is_workfile_created(explain_query text) returns setof int as $$ import re query = "select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0;" rv = plpy.execute(query) nsegments = int(rv[0]['nsegments']) rv = plpy.execute(explain_query) search_text = 'Work_mem used' result = [] for i in range(len(rv)): cur_line = rv[i]['QUERY PLAN'] if search_text.lower() in cur_line.lower(): p = re.compile('.+\((seg[\d]+).+ Workfile: \(([\d+]) spilling\)') m = p.match(cur_line) workfile_created = int(m.group(2)) cur_row = int(workfile_created == nsegments) result.append(cur_row) return result $$ language plpython3u; -CREATE - -create table testsisc (i1 int, i2 int, i3 int, i4 int); -CREATE -insert into testsisc select i, i % 1000, i % 100000, i % 75 from (select generate_series(1, nsegments * 50000) as i from (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; -INSERT 100000 - --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -ERROR: resource group "rg1_memory_test" does not exist --- end_ignore -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=2); -CREATE -CREATE ROLE role1_memory_test SUPERUSER RESOURCE GROUP rg1_memory_test; -CREATE -SET ROLE TO role1_memory_test; -SET - -set gp_resgroup_print_operator_memory_limits=on; -SET -set gp_cte_sharing=on; -SET --- ORCA optimizes away the ORDER BY in our test query, and therefore doesn't exercise --- a Sort that spills. -set optimizer=off; -SET - -set gp_enable_mk_sort=on; -SET -select avg(i3) from ( with ctesisc as (select * from testsisc order by i2) select t1.i3, t2.i2 from ctesisc as t1, ctesisc as t2 where t1.i1 = t2.i2 ) foo; - avg ------ - 500 -(1 row) - -select * from sisc_sort_spill.is_workfile_created('explain analyze with ctesisc as (select * from testsisc order by i2) select t1.i3, t2.i2 from ctesisc as t1, ctesisc as t2 where t1.i1 = t2.i2 ;'); - is_workfile_created ---------------------- - 1 - 1 -(2 rows) -select * from sisc_sort_spill.is_workfile_created('explain analyze with ctesisc as (select * from testsisc order by i2) select t1.i3, t2.i2 from ctesisc as t1, ctesisc as t2 where t1.i1 = t2.i2 limit 50000;'); - is_workfile_created ---------------------- - 1 - 1 -(2 rows) - - -set gp_enable_mk_sort=off; -SET -select avg(i3) from ( with ctesisc as (select * from testsisc order by i2) select t1.i3, t2.i2 from ctesisc as t1, ctesisc as t2 where t1.i1 = t2.i2 ) foo; - avg ------ - 500 -(1 row) - -select * from sisc_sort_spill.is_workfile_created('explain analyze with ctesisc as (select * from testsisc order by i2) select t1.i3, t2.i2 from ctesisc as t1, ctesisc as t2 where t1.i1 = t2.i2 ;'); - is_workfile_created ---------------------- - 1 - 1 -(2 rows) - -select * from sisc_sort_spill.is_workfile_created('explain analyze with ctesisc as (select * from testsisc order by i2) select t1.i3, t2.i2 from ctesisc as t1, ctesisc as t2 where t1.i1 = t2.i2 limit 50000;'); - is_workfile_created ---------------------- - 1 - 1 -(2 rows) - -drop schema sisc_sort_spill cascade; -DROP - --- start_ignore -RESET ROLE; -RESET -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP --- end_ignore diff --git a/src/test/isolation2/expected/resgroup/resgroup_memory_sort_spill.out b/src/test/isolation2/expected/resgroup/resgroup_memory_sort_spill.out deleted file mode 100644 index 75d90836bac..00000000000 --- a/src/test/isolation2/expected/resgroup/resgroup_memory_sort_spill.out +++ /dev/null @@ -1,87 +0,0 @@ --- start_matchsubs --- m/INSERT \d+/ --- s/INSERT \d+/INSERT/ --- end_matchsubs -create schema sort_spill; -CREATE -set search_path to sort_spill; -SET - --- start_ignore -create language plpython3u; -ERROR: language "plpython3u" already exists --- end_ignore - --- set workfile is created to true if all segment did it. -create or replace function sort_spill.is_workfile_created(explain_query text) returns setof int as $$ import re query = "select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0;" rv = plpy.execute(query) nsegments = int(rv[0]['nsegments']) rv = plpy.execute(explain_query) search_text = 'Work_mem used' result = [] for i in range(len(rv)): cur_line = rv[i]['QUERY PLAN'] if search_text.lower() in cur_line.lower(): p = re.compile('.+\((seg[\d]+).+ Workfile: \(([\d+]) spilling\)') m = p.match(cur_line) workfile_created = int(m.group(2)) cur_row = int(workfile_created == nsegments) result.append(cur_row) return result $$ language plpython3u; -CREATE - - -create table testsort (i1 int, i2 int, i3 int, i4 int); -CREATE -insert into testsort select i, i % 1000, i % 100000, i % 75 from (select generate_series(1, nsegments * 50000) as i from (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; -INSERT 100000 - --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -ERROR: resource group "rg1_memory_test" does not exist --- end_ignore -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=1); -CREATE -CREATE ROLE role1_memory_test SUPERUSER RESOURCE GROUP rg1_memory_test; -CREATE -SET ROLE TO role1_memory_test; -SET - -set gp_resgroup_print_operator_memory_limits=on; -SET - -set gp_enable_mk_sort=on; -SET -select avg(i2) from (select i1,i2 from testsort order by i2) foo; - avg -------- - 499.5 -(1 row) -select * from sort_spill.is_workfile_created('explain analyze select i1,i2 from testsort order by i2;'); - is_workfile_created ---------------------- - 1 -(1 row) -select * from sort_spill.is_workfile_created('explain analyze select i1,i2 from testsort order by i2 limit 50000;'); - is_workfile_created ---------------------- - 1 -(1 row) - -set gp_enable_mk_sort=off; -SET -select avg(i2) from (select i1,i2 from testsort order by i2) foo; - avg -------- - 499.5 -(1 row) -select * from sort_spill.is_workfile_created('explain analyze select i1,i2 from testsort order by i2;'); - is_workfile_created ---------------------- - 1 -(1 row) -select * from sort_spill.is_workfile_created('explain analyze select i1,i2 from testsort order by i2 limit 50000;'); - is_workfile_created ---------------------- - 1 -(1 row) - -drop schema sort_spill cascade; -DROP - --- start_ignore -RESET ROLE; -RESET -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP --- end_ignore diff --git a/src/test/isolation2/expected/resgroup/resgroup_memory_spilltodisk.out b/src/test/isolation2/expected/resgroup/resgroup_memory_spilltodisk.out deleted file mode 100644 index eeaa44474a2..00000000000 --- a/src/test/isolation2/expected/resgroup/resgroup_memory_spilltodisk.out +++ /dev/null @@ -1,163 +0,0 @@ --- up the admin_group memory limits -ALTER RESOURCE GROUP admin_group SET memory_limit 30; -ALTER - --- Test Mark/Restore in Material Node -create table spilltest1 (a integer); -CREATE -create table spilltest2 (a integer); -CREATE -insert into spilltest1 select a from generate_series(1,400000) a; -INSERT 400000 -insert into spilltest2 select a from generate_series(1,400000) a; -INSERT 400000 - --- go back to the default admin_group limit -ALTER RESOURCE GROUP admin_group SET memory_limit 10; -ALTER - --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -ERROR: resource group "rg1_memory_test" does not exist --- end_ignore -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=10); -CREATE -CREATE ROLE role1_memory_test SUPERUSER RESOURCE GROUP rg1_memory_test; -CREATE -SET ROLE TO role1_memory_test; -SET - -set enable_hashagg=off; -SET -set enable_mergejoin=on; -SET -set enable_hashjoin=off; -SET -set enable_nestloop=off; -SET - -create temporary table spilltestresult1 as select t1.a as t1a, t2.a as t2a from (select a from spilltest1 group by a) t1, (select a from spilltest2 group by a) t2 where t1.a = t2.a; -CREATE 400000 - --- check that the result looks sane -select count(*), sum(t1a), sum(t2a), sum(t1a - t2a) from spilltestresult1; - count | sum | sum | sum ---------+-------------+-------------+----- - 400000 | 80000200000 | 80000200000 | 0 -(1 row) - --- Test Hash Aggregation when the work mem is too small for the hash table -create table spilltest (a integer, b integer); -CREATE -insert into spilltest select a, a%25 from generate_series(1,8000) a; -INSERT 8000 -analyze; -ANALYZE -set enable_hashagg=on; -SET -set enable_groupagg=off; -SET - -select b,count(*) from spilltest group by b order by b; - b | count -----+------- - 0 | 320 - 1 | 320 - 2 | 320 - 3 | 320 - 4 | 320 - 5 | 320 - 6 | 320 - 7 | 320 - 8 | 320 - 9 | 320 - 10 | 320 - 11 | 320 - 12 | 320 - 13 | 320 - 14 | 320 - 15 | 320 - 16 | 320 - 17 | 320 - 18 | 320 - 19 | 320 - 20 | 320 - 21 | 320 - 22 | 320 - 23 | 320 - 24 | 320 -(25 rows) - -select b,count(*) from spilltest group by b order by b; - b | count -----+------- - 0 | 320 - 1 | 320 - 2 | 320 - 3 | 320 - 4 | 320 - 5 | 320 - 6 | 320 - 7 | 320 - 8 | 320 - 9 | 320 - 10 | 320 - 11 | 320 - 12 | 320 - 13 | 320 - 14 | 320 - 15 | 320 - 16 | 320 - 17 | 320 - 18 | 320 - 19 | 320 - 20 | 320 - 21 | 320 - 22 | 320 - 23 | 320 - 24 | 320 -(25 rows) --- Test Hash Join when the work mem is too small for the hash table -drop table if exists spilltest; -DROP -create table spilltest (a integer, b integer); -CREATE -insert into spilltest select a, a%25 from generate_series(1,800000) a; -INSERT 800000 -analyze; -- We have to do an analyze to force a hash join set enable_mergejoin=off; -ANALYZE; -set enable_nestloop=off; -SET -set enable_hashjoin=on; -SET - -create temporary table spilltestresult2 as select t1.a as t1a, t1.b as t1b, t2.a as t2a, t2.b as t2b from spilltest t1, spilltest t2 where t1.a = t2.a; -CREATE 800000 --- check that the result looks sane -select count(*), sum(t1a), sum(t2a), sum(t2a), sum(t2b), sum(t1a * t1b) from spilltestresult2; - count | sum | sum | sum | sum | sum ---------+--------------+--------------+--------------+---------+--------------- - 800000 | 320000400000 | 320000400000 | 320000400000 | 9600000 | 3840036800000 -(1 row) - -drop table spilltest1; -DROP -drop table spilltest2; -DROP -drop table spilltest; -DROP -drop table spilltestresult1; -DROP -drop table spilltestresult2; -DROP - --- start_ignore -RESET ROLE; -RESET -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP --- end_ignore diff --git a/src/test/isolation2/expected/resgroup/resgroup_name_convention.out b/src/test/isolation2/expected/resgroup/resgroup_name_convention.out index a29307bfe18..2c96c655bd9 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_name_convention.out +++ b/src/test/isolation2/expected/resgroup/resgroup_name_convention.out @@ -28,7 +28,7 @@ CREATE -- -- by default resgroup names have the form of [_a-zA-Z][_a-zA-Z0-9]* -CREATE RESOURCE GROUP rgNameTest01 WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rgNameTest01 WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP rgNameTest01 SET concurrency 2; ALTER @@ -39,7 +39,7 @@ SELECT * FROM rg_name_view; (1 row) DROP RESOURCE GROUP rgNameTest01; DROP -CREATE RESOURCE GROUP __rg_name_test_01__ WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP __rg_name_test_01__ WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP __rg_name_test_01__ SET concurrency 2; ALTER @@ -52,13 +52,13 @@ DROP RESOURCE GROUP __rg_name_test_01__; DROP -- min length is 1 character -CREATE RESOURCE GROUP Z WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP Z WITH (cpu_hard_quota_limit=10); CREATE DROP RESOURCE GROUP Z; DROP -- max length is 63 characters -CREATE RESOURCE GROUP max012345678901234567890123456789012345678901234567890123456789 WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP max012345678901234567890123456789012345678901234567890123456789 WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP max012345678901234567890123456789012345678901234567890123456789 SET concurrency 2; ALTER @@ -70,7 +70,7 @@ SELECT * FROM rg_name_view; DROP RESOURCE GROUP max012345678901234567890123456789012345678901234567890123456789; DROP -- characters exceed the max length are ignored -CREATE RESOURCE GROUP max012345678901234567890123456789012345678901234567890123456789further WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP max012345678901234567890123456789012345678901234567890123456789further WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP max012345678901234567890123456789012345678901234567890123456789are SET concurrency 2; ALTER @@ -84,7 +84,7 @@ DROP -- special characters are allowed with double quotation marks -- white spaces -CREATE RESOURCE GROUP "newlines s p a c e s t a b s" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "newlines s p a c e s t a b s" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "newlines s p a c e s t a b s" SET concurrency 2; ALTER @@ -98,7 +98,7 @@ t a b s | 2 DROP RESOURCE GROUP "newlines s p a c e s t a b s"; DROP -- punctuations -CREATE RESOURCE GROUP "!#$%&`()*+,-./:;<=>?@[]^_{|}~" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "!#$%&`()*+,-./:;<=>?@[]^_{|}~" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "!#$%&`()*+,-./:;<=>?@[]^_{|}~" SET concurrency 2; ALTER @@ -110,7 +110,7 @@ SELECT * FROM rg_name_view; DROP RESOURCE GROUP "!#$%&`()*+,-./:;<=>?@[]^_{|}~"; DROP -- quotation marks -CREATE RESOURCE GROUP "'' are 2 single quotation marks" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "'' are 2 single quotation marks" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "'' are 2 single quotation marks" SET concurrency 2; ALTER @@ -121,7 +121,7 @@ SELECT * FROM rg_name_view; (1 row) DROP RESOURCE GROUP "'' are 2 single quotation marks"; DROP -CREATE RESOURCE GROUP """ is 1 double quotation mark" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP """ is 1 double quotation mark" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP """ is 1 double quotation mark" SET concurrency 2; ALTER @@ -134,7 +134,7 @@ DROP RESOURCE GROUP """ is 1 double quotation mark"; DROP -- nothing special with leading character -CREATE RESOURCE GROUP "0 as prefix" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "0 as prefix" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "0 as prefix" SET concurrency 2; ALTER @@ -145,7 +145,7 @@ SELECT * FROM rg_name_view; (1 row) DROP RESOURCE GROUP "0 as prefix"; DROP -CREATE RESOURCE GROUP " leading space" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP " leading space" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP " leading space" SET concurrency 2; ALTER @@ -158,7 +158,7 @@ DROP RESOURCE GROUP " leading space"; DROP -- backslash is not used as the escape character -CREATE RESOURCE GROUP "\\ are two backslashes" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "\\ are two backslashes" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "\\ are two backslashes" SET concurrency 2; ALTER @@ -170,11 +170,11 @@ SELECT * FROM rg_name_view; DROP RESOURCE GROUP "\\ are two backslashes"; DROP -- below are octal, hex and unicode representations of "rg1" -CREATE RESOURCE GROUP "\o162\o147\o61" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "\o162\o147\o61" WITH (cpu_hard_quota_limit=10); CREATE -CREATE RESOURCE GROUP "\x72\x67\x31" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "\x72\x67\x31" WITH (cpu_hard_quota_limit=10); CREATE -CREATE RESOURCE GROUP "\u0072\u0067\u0031" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "\u0072\u0067\u0031" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "\o162\o147\o61" SET concurrency 2; ALTER @@ -202,7 +202,7 @@ DROP RESOURCE GROUP "\u0072\u0067\u0031"; DROP -- unicode escapes are supported -CREATE RESOURCE GROUP U&"\0441\043B\043E\043D" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP U&"\0441\043B\043E\043D" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP U&"\0441\043B\043E\043D" SET concurrency 2; ALTER @@ -214,7 +214,7 @@ SELECT * FROM rg_name_view; DROP RESOURCE GROUP U&"\0441\043B\043E\043D"; DROP -- unicode representation of "rg1" -CREATE RESOURCE GROUP U&"\0072\0067\0031" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP U&"\0072\0067\0031" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "rg1" SET concurrency 2; ALTER @@ -227,7 +227,7 @@ DROP RESOURCE GROUP "rg1"; DROP -- CJK characters are allowed with or without double quotation marks -CREATE RESOURCE GROUP 资源组 WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP 资源组 WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "资源组" SET concurrency 2; ALTER @@ -238,7 +238,7 @@ SELECT * FROM rg_name_view; (1 row) DROP RESOURCE GROUP 资源组; DROP -CREATE RESOURCE GROUP リソース・グループ WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP リソース・グループ WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "リソース・グループ" SET concurrency 2; ALTER @@ -249,7 +249,7 @@ SELECT * FROM rg_name_view; (1 row) DROP RESOURCE GROUP リソース・グループ; DROP -CREATE RESOURCE GROUP 자원그룹 WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP 자원그룹 WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "자원그룹" SET concurrency 2; ALTER @@ -263,11 +263,11 @@ DROP -- names are case sensitive, -- but are always converted to lower case unless around with quotation marks -CREATE RESOURCE GROUP "RG_NAME_TEST" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "RG_NAME_TEST" WITH (cpu_hard_quota_limit=10); CREATE -CREATE RESOURCE GROUP rg_Name_Test WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_Name_Test WITH (cpu_hard_quota_limit=10); CREATE -CREATE RESOURCE GROUP "rg_name_test" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "rg_name_test" WITH (cpu_hard_quota_limit=10); ERROR: resource group "rg_name_test" already exists ALTER RESOURCE GROUP Rg_NaMe_TeSt SET concurrency 2; ALTER @@ -286,7 +286,7 @@ DROP -- reserved names are all lower case: "default_group", "admin_group", "none", -- they can be used by users with at least one upper case character. -CREATE RESOURCE GROUP "None" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "None" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "None" SET concurrency 2; ALTER @@ -297,7 +297,7 @@ SELECT * FROM rg_name_view; (1 row) DROP RESOURCE GROUP "None"; DROP -CREATE RESOURCE GROUP "NONE" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "NONE" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "NONE" SET concurrency 2; ALTER @@ -308,7 +308,7 @@ SELECT * FROM rg_name_view; (1 row) DROP RESOURCE GROUP "NONE"; DROP -CREATE RESOURCE GROUP "DEFAULT_GROup" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "DEFAULT_GROup" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "DEFAULT_GROup" SET concurrency 2; ALTER @@ -319,7 +319,7 @@ SELECT * FROM rg_name_view; (1 row) DROP RESOURCE GROUP "DEFAULT_GROup"; DROP -CREATE RESOURCE GROUP "ADMIN_GROUP" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "ADMIN_GROUP" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "ADMIN_GROUP" SET concurrency 2; ALTER @@ -331,7 +331,7 @@ SELECT * FROM rg_name_view; DROP RESOURCE GROUP "ADMIN_GROUP"; DROP -CREATE RESOURCE GROUP "with" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "with" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "with" SET concurrency 2; ALTER @@ -342,7 +342,7 @@ SELECT * FROM rg_name_view; (1 row) DROP RESOURCE GROUP "with"; DROP -CREATE RESOURCE GROUP "WITH" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "WITH" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "WITH" SET concurrency 2; ALTER @@ -353,7 +353,7 @@ SELECT * FROM rg_name_view; (1 row) DROP RESOURCE GROUP "WITH"; DROP -CREATE RESOURCE GROUP "group" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "group" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "group" SET concurrency 2; ALTER @@ -364,7 +364,7 @@ SELECT * FROM rg_name_view; (1 row) DROP RESOURCE GROUP "group"; DROP -CREATE RESOURCE GROUP "create" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "create" WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP "create" SET concurrency 2; ALTER @@ -381,58 +381,58 @@ DROP -- -- does not support single quotation marks around the name -CREATE RESOURCE GROUP 'must_fail' WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP 'must_fail' WITH (cpu_hard_quota_limit=10); ERROR: syntax error at or near "'must_fail'" -LINE 1: CREATE RESOURCE GROUP 'must_fail' WITH (cpu_rate_limit=10, m... +LINE 1: CREATE RESOURCE GROUP 'must_fail' WITH (cpu_hard_quota_limit... ^ -- does not support leading numbers -CREATE RESOURCE GROUP 0_must_fail WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP 0_must_fail WITH (cpu_hard_quota_limit=10); ERROR: syntax error at or near "0" -LINE 1: CREATE RESOURCE GROUP 0_must_fail WITH (cpu_rate_limit=10, m... +LINE 1: CREATE RESOURCE GROUP 0_must_fail WITH (cpu_hard_quota_limit... ^ -- reserved names are not allowed even with double quotation marks -CREATE RESOURCE GROUP "default_group" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "default_group" WITH (cpu_hard_quota_limit=10); ERROR: resource group "default_group" already exists -CREATE RESOURCE GROUP "admin_group" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "admin_group" WITH (cpu_hard_quota_limit=10); ERROR: resource group "admin_group" already exists -CREATE RESOURCE GROUP "none" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "none" WITH (cpu_hard_quota_limit=10); ERROR: resource group name "none" is reserved -CREATE RESOURCE GROUP default_group WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP default_group WITH (cpu_hard_quota_limit=10); ERROR: resource group "default_group" already exists -CREATE RESOURCE GROUP admin_group WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP admin_group WITH (cpu_hard_quota_limit=10); ERROR: resource group "admin_group" already exists -CREATE RESOURCE GROUP none WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP none WITH (cpu_hard_quota_limit=10); ERROR: resource group name "none" is reserved -CREATE RESOURCE GROUP DEFAULT_GROUP WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP DEFAULT_GROUP WITH (cpu_hard_quota_limit=10); ERROR: resource group "default_group" already exists -CREATE RESOURCE GROUP Admin_Group WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP Admin_Group WITH (cpu_hard_quota_limit=10); ERROR: resource group "admin_group" already exists -CREATE RESOURCE GROUP NONE WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP NONE WITH (cpu_hard_quota_limit=10); ERROR: resource group name "none" is reserved -- keywords are not allowed without quotation marks -CREATE RESOURCE GROUP with WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP with WITH (cpu_hard_quota_limit=10); ERROR: syntax error at or near "with" -LINE 1: CREATE RESOURCE GROUP with WITH (cpu_rate_limit=10, memory_l... +LINE 1: CREATE RESOURCE GROUP with WITH (cpu_hard_quota_limit=10); ^ -CREATE RESOURCE GROUP WITH WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP WITH WITH (cpu_hard_quota_limit=10); ERROR: syntax error at or near "WITH" -LINE 1: CREATE RESOURCE GROUP WITH WITH (cpu_rate_limit=10, memory_l... +LINE 1: CREATE RESOURCE GROUP WITH WITH (cpu_hard_quota_limit=10); ^ -CREATE RESOURCE GROUP group WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP group WITH (cpu_hard_quota_limit=10); ERROR: syntax error at or near "group" -LINE 1: CREATE RESOURCE GROUP group WITH (cpu_rate_limit=10, memory_... +LINE 1: CREATE RESOURCE GROUP group WITH (cpu_hard_quota_limit=10); ^ -CREATE RESOURCE GROUP CREATE WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP CREATE WITH (cpu_hard_quota_limit=10); ERROR: syntax error at or near "CREATE" -LINE 1: CREATE RESOURCE GROUP CREATE WITH (cpu_rate_limit=10, memory... +LINE 1: CREATE RESOURCE GROUP CREATE WITH (cpu_hard_quota_limit=10); ^ -- min length is 1 character -CREATE RESOURCE GROUP "" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "" WITH (cpu_hard_quota_limit=10); ERROR: zero-length delimited identifier at or near """" -LINE 1: CREATE RESOURCE GROUP "" WITH (cpu_rate_limit=10, memory_lim... +LINE 1: CREATE RESOURCE GROUP "" WITH (cpu_hard_quota_limit=10); ^ diff --git a/src/test/isolation2/expected/resgroup/resgroup_operator_memory.out b/src/test/isolation2/expected/resgroup/resgroup_operator_memory.out index ae5aedfc26f..ee0d64f6eea 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_operator_memory.out +++ b/src/test/isolation2/expected/resgroup/resgroup_operator_memory.out @@ -34,7 +34,7 @@ CREATE -- - rg1's memory quota is 682 * 1% = 6; -- - per-xact quota is 6/3=2; -- - spill memory is 2 * 60% = 1; -CREATE RESOURCE GROUP rg1_opmem_test WITH (cpu_rate_limit=10, memory_limit=1, memory_shared_quota=0, concurrency=3, memory_spill_ratio=60); +CREATE RESOURCE GROUP rg1_opmem_test WITH (cpu_hard_quota_limit=10, memory_limit=1, memory_shared_quota=0, concurrency=3, memory_spill_ratio=60); CREATE CREATE ROLE r1_opmem_test RESOURCE GROUP rg1_opmem_test; @@ -56,7 +56,7 @@ SET SET ROLE TO r1_opmem_test; SET SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset + groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- (0 rows) RESET role; @@ -67,7 +67,7 @@ SET SET ROLE TO r1_opmem_test; SET SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset + groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- (0 rows) RESET role; @@ -78,7 +78,7 @@ SET SET ROLE TO r1_opmem_test; SET SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset + groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- (0 rows) RESET role; @@ -90,7 +90,7 @@ RESET -- rg1 has no group level shared memory, and most memory are granted to rg2, -- there is only very little global shared memory due to integer rounding. -CREATE RESOURCE GROUP rg2_opmem_test WITH (cpu_rate_limit=10, memory_limit=59); +CREATE RESOURCE GROUP rg2_opmem_test WITH (cpu_hard_quota_limit=10, memory_limit=59); CREATE -- this query can execute but will raise OOM error. @@ -143,7 +143,7 @@ SET SET ROLE TO r1_opmem_test; SET SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset + groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- (0 rows) RESET role; @@ -154,7 +154,7 @@ SET SET ROLE TO r1_opmem_test; SET SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset + groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- (0 rows) RESET role; @@ -165,7 +165,7 @@ SET SET ROLE TO r1_opmem_test; SET SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset + groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- (0 rows) RESET role; @@ -191,7 +191,7 @@ SET SET ROLE TO r1_opmem_test; SET SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset + groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- (0 rows) RESET role; @@ -202,7 +202,7 @@ SET SET ROLE TO r1_opmem_test; SET SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset + groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- (0 rows) RESET role; @@ -213,7 +213,7 @@ SET SET ROLE TO r1_opmem_test; SET SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset + groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- (0 rows) RESET role; @@ -231,7 +231,7 @@ SET SET ROLE TO r1_opmem_test; SET SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset + groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- (0 rows) SELECT f1_opmem_test(); @@ -247,7 +247,7 @@ SET SET ROLE TO r1_opmem_test; SET SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset + groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- (0 rows) SELECT f1_opmem_test(); @@ -263,7 +263,7 @@ SET SET ROLE TO r1_opmem_test; SET SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset + groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ---------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- (0 rows) SELECT f1_opmem_test(); diff --git a/src/test/isolation2/expected/resgroup/resgroup_parallel_queries.out b/src/test/isolation2/expected/resgroup/resgroup_parallel_queries.out index 53e7b03dacb..1b30bfe2ada 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_parallel_queries.out +++ b/src/test/isolation2/expected/resgroup/resgroup_parallel_queries.out @@ -47,12 +47,12 @@ CREATE OK (1 row) -1>:select exec_commands_n('dblink_rg_test1','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_rate_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set concurrency #', 60, '', '1-6', false); -2>:select exec_commands_n('dblink_rg_test2','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_rate_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set concurrency#', 60, '', '1-6', false); -3>:select exec_commands_n('dblink_rg_test3','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_rate_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set cpu_rate_limit #', 60, '', '1-6', false); -4>:select exec_commands_n('dblink_rg_test4','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_rate_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set memory_limit #', 60, '', '1-6', false); -5>:select exec_commands_n('dblink_rg_test5','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_rate_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set memory_shared_quota #', 60, '', '1-6', false); -6>:select exec_commands_n('dblink_rg_test6','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_rate_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set memory_limit #', 60, '', '1-6', false); +1>:select exec_commands_n('dblink_rg_test1','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set concurrency #', 60, '', '1-6', false); +2>:select exec_commands_n('dblink_rg_test2','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set concurrency#', 60, '', '1-6', false); +3>:select exec_commands_n('dblink_rg_test3','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set cpu_hard_quota_limit #', 60, '', '1-6', false); +4>:select exec_commands_n('dblink_rg_test4','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set memory_limit #', 60, '', '1-6', false); +5>:select exec_commands_n('dblink_rg_test5','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set memory_shared_quota #', 60, '', '1-6', false); +6>:select exec_commands_n('dblink_rg_test6','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set memory_limit #', 60, '', '1-6', false); 1<: <... completed> exec_commands_n @@ -155,7 +155,7 @@ select exec_commands_n('dblink_rg_test','DROP RESOURCE GROUP rg_test_g%', '', '' -- end_ignore -- create 6 roles and 6 resource groups -select exec_commands_n('dblink_rg_test','CREATE RESOURCE GROUP rg_test_g% WITH (concurrency=9, cpu_rate_limit=1, memory_limit=7)', '', '', 6, '1-6', '', true); +select exec_commands_n('dblink_rg_test','CREATE RESOURCE GROUP rg_test_g% WITH (concurrency=9, cpu_hard_quota_limit=1, memory_limit=7)', '', '', 6, '1-6', '', true); exec_commands_n ----------------- 6 @@ -182,8 +182,8 @@ select dblink_disconnect('dblink_rg_test'); OK (1 row) -select groupname, concurrency, cpu_rate_limit from gp_toolkit.gp_resgroup_config where groupname like 'rg_test_g%' order by groupname; - groupname | concurrency | cpu_rate_limit +select groupname, concurrency, cpu_hard_quota_limit from gp_toolkit.gp_resgroup_config where groupname like 'rg_test_g%' order by groupname; + groupname | concurrency | cpu_hard_quota_limit ------------+-------------+---------------- rg_test_g1 | 9 | 1 rg_test_g2 | 9 | 1 @@ -251,13 +251,13 @@ select groupname, concurrency, cpu_rate_limit from gp_toolkit.gp_resgroup_config (1 row) 31>: select exec_commands_n('dblink_rg_test31', 'alter resource group rg_test_g% set concurrency #', 'select 1 from pg_sleep(0.1)', '', 1000, '1-6', '0-5', true); --- start a new session to alter cpu_rate_limit randomly +-- start a new session to alter cpu_hard_quota_limit randomly 32: select dblink_connect('dblink_rg_test32', 'dbname=isolation2resgrouptest'); dblink_connect ---------------- OK (1 row) -32>: select exec_commands_n('dblink_rg_test32', 'alter resource group rg_test_g% set cpu_rate_limit #', 'select 1 from pg_sleep(0.1)', '', 1000, '1-6', '1-6', true); +32>: select exec_commands_n('dblink_rg_test32', 'alter resource group rg_test_g% set cpu_hard_quota_limit #', 'select 1 from pg_sleep(0.1)', '', 1000, '1-6', '1-6', true); -- start a new session to alter memory_limit randomly 33: select dblink_connect('dblink_rg_test33', 'dbname=isolation2resgrouptest'); @@ -292,7 +292,7 @@ select groupname, concurrency, cpu_rate_limit from gp_toolkit.gp_resgroup_config ---------------- OK (1 row) -42>: select exec_commands_n('dblink_rg_test42', 'create resource group rg_test_g7 with (cpu_rate_limit=1, memory_limit=1)', 'drop resource group rg_test_g7', '', 1000, '', '', true); +42>: select exec_commands_n('dblink_rg_test42', 'create resource group rg_test_g7 with (cpu_hard_quota_limit=1, memory_limit=1)', 'drop resource group rg_test_g7', '', 1000, '', '', true); 31<: <... completed> exec_commands_n @@ -454,7 +454,7 @@ select groupname, concurrency, cpu_rate_limit from gp_toolkit.gp_resgroup_config 41q: ... 42q: ... -select groupname, concurrency::int < 7, cpu_rate_limit::int < 7 from gp_toolkit.gp_resgroup_config where groupname like 'rg_test_g%' order by groupname; +select groupname, concurrency::int < 7, cpu_hard_quota_limit::int < 7 from gp_toolkit.gp_resgroup_config where groupname like 'rg_test_g%' order by groupname; groupname | ?column? | ?column? ------------+----------+---------- rg_test_g1 | t | t @@ -519,7 +519,7 @@ select dblink_disconnect('dblink_rg_test'); -- -- 5*: Test connections in utility mode are not governed by resource group -- -create resource group rg_test_g8 with (concurrency= 1, cpu_rate_limit=1, memory_limit=1); +create resource group rg_test_g8 with (concurrency= 1, cpu_hard_quota_limit=1, memory_limit=1); CREATE create role rg_test_r8 login resource group rg_test_g8; CREATE @@ -591,7 +591,7 @@ DROP -- clean up select * from gp_toolkit.gp_resgroup_config; - groupid | groupname | concurrency | proposed_concurrency | cpu_rate_limit | memory_limit | proposed_memory_limit | memory_shared_quota | proposed_memory_shared_quota | memory_spill_ratio | proposed_memory_spill_ratio + groupid | groupname | concurrency | proposed_concurrency | cpu_hard_quota_limit | memory_limit | proposed_memory_limit | memory_shared_quota | proposed_memory_shared_quota | memory_spill_ratio | proposed_memory_spill_ratio ---------+---------------+-------------+----------------------+----------------+--------------+-----------------------+---------------------+------------------------------+--------------------+----------------------------- 6437 | default_group | 20 | 20 | 30 | 30 | 30 | 50 | 50 | 20 | 20 6438 | admin_group | 40 | 40 | 10 | 10 | 10 | 50 | 50 | 20 | 20 diff --git a/src/test/isolation2/expected/resgroup/resgroup_recreate.out b/src/test/isolation2/expected/resgroup/resgroup_recreate.out index 0c6560835cb..4db5d41f27b 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_recreate.out +++ b/src/test/isolation2/expected/resgroup/resgroup_recreate.out @@ -5,7 +5,7 @@ DROP RESOURCE GROUP rg1; ERROR: resource group "rg1" does not exist -- end_ignore -CREATE RESOURCE GROUP rg1 WITH (concurrency=2, cpu_rate_limit=10, memory_limit=50, memory_shared_quota=0); +CREATE RESOURCE GROUP rg1 WITH (concurrency=2, cpu_hard_quota_limit=10); CREATE CREATE ROLE r1 RESOURCE GROUP rg1; CREATE @@ -21,7 +21,7 @@ ALTER ROLE r1 RESOURCE GROUP none; ALTER DROP RESOURCE GROUP rg1; DROP -CREATE RESOURCE GROUP rg1 WITH (concurrency=2, cpu_rate_limit=10, memory_limit=50, memory_shared_quota=0); +CREATE RESOURCE GROUP rg1 WITH (concurrency=2, cpu_hard_quota_limit=10); CREATE ALTER ROLE r1 RESOURCE GROUP rg1; ALTER diff --git a/src/test/isolation2/expected/resgroup/resgroup_seg_down_2pc.out b/src/test/isolation2/expected/resgroup/resgroup_seg_down_2pc.out index 04d7824f205..16669ceeee1 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_seg_down_2pc.out +++ b/src/test/isolation2/expected/resgroup/resgroup_seg_down_2pc.out @@ -16,7 +16,7 @@ select pg_reload_conf(); t (1 row) -1:create resource group rgroup_seg_down with (CPU_RATE_LIMIT=35, MEMORY_LIMIT=35, CONCURRENCY=10); +1:create resource group rgroup_seg_down with (cpu_hard_quota_limit=35, CONCURRENCY=10); CREATE -- inject an error in function dtm_broadcast_commit_prepared, that is before QD broadcasts commit prepared command to QEs diff --git a/src/test/isolation2/expected/resgroup/resgroup_set_memory_spill_ratio.out b/src/test/isolation2/expected/resgroup/resgroup_set_memory_spill_ratio.out deleted file mode 100644 index f3adbc83c4a..00000000000 --- a/src/test/isolation2/expected/resgroup/resgroup_set_memory_spill_ratio.out +++ /dev/null @@ -1,197 +0,0 @@ --- This query must be the first one in this case. --- SHOW command will be bypassed in resgroup, when it's the first command --- in a connection it needs special handling to show memory_spill_ratio --- correctly. Verify that it shows the correct value 10 instead of default 20. -SHOW memory_spill_ratio; - memory_spill_ratio --------------------- - 10 -(1 row) - ---start_ignore -DROP ROLE role1_spill_test; -ERROR: role "role1_spill_test" does not exist -DROP ROLE role2_spill_test; -ERROR: role "role2_spill_test" does not exist -DROP RESOURCE GROUP rg1_spill_test; -ERROR: resource group "rg1_spill_test" does not exist -DROP RESOURCE GROUP rg2_spill_test; -ERROR: resource group "rg2_spill_test" does not exist ---end_ignore - -CREATE RESOURCE GROUP rg1_spill_test WITH (CONCURRENCY=10, MEMORY_LIMIT=10, CPU_RATE_LIMIT=10, memory_shared_quota=20, memory_spill_ratio=30); -CREATE -CREATE RESOURCE GROUP rg2_spill_test WITH (CONCURRENCY=10, MEMORY_LIMIT=10, CPU_RATE_LIMIT=10, memory_shared_quota=50, memory_spill_ratio=10); -CREATE -CREATE ROLE role1_spill_test RESOURCE GROUP rg1_spill_test; -CREATE -CREATE ROLE role2_spill_test RESOURCE GROUP rg2_spill_test; -CREATE - --- positive set to resource group level ---start_ignore -SET ROLE role1_spill_test; -SET -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 30 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) ---end_ignore - --- positive set to session level -SET MEMORY_SPILL_RATIO TO 70; -SET -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 70 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) - --- positive fallback to statement_mem at session level -SET MEMORY_SPILL_RATIO TO 0; -SET -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 0 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) - --- negative set to session level -SET MEMORY_SPILL_RATIO TO 101; -ERROR: 101 is outside the valid range for parameter "memory_spill_ratio" (0 .. 100) -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 0 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) - --- positive set to session level -SET MEMORY_SPILL_RATIO TO 90; -SET -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 90 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) - --- positive set to session level -SET MEMORY_SPILL_RATIO TO 20; -SET -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 20 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) - --- reset to resource group level -RESET MEMORY_SPILL_RATIO; -RESET -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 30 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) - --- positive set to session level -SET MEMORY_SPILL_RATIO TO 60; -SET -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 60 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) - --- change role, positive for session level -SET ROLE role2_spill_test; -SET -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 60 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) - --- positive set to session level -SET MEMORY_SPILL_RATIO TO 20; -SET -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 20 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) - --- reset to resource group level -RESET MEMORY_SPILL_RATIO; -RESET -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 10 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) - --- cleanup -RESET ROLE; -RESET -DROP ROLE role1_spill_test; -DROP -DROP ROLE role2_spill_test; -DROP -DROP RESOURCE GROUP rg1_spill_test; -DROP -DROP RESOURCE GROUP rg2_spill_test; -DROP diff --git a/src/test/isolation2/expected/resgroup/resgroup_syntax.out b/src/test/isolation2/expected/resgroup/resgroup_syntax.out index 4edaa812a9f..66e7d9ff0e5 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_syntax.out +++ b/src/test/isolation2/expected/resgroup/resgroup_syntax.out @@ -94,81 +94,82 @@ ERROR: resource group "rg_test_group" does not exist --end_ignore SELECT * FROM gp_toolkit.gp_resgroup_config; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+---------------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- - 6437 | default_group | 20 | 20 | 30 | 80 | 10 | vmtracker | -1 - 6441 | system_group | 0 | 10 | 0 | 0 | 0 | vmtracker | -1 - 6438 | admin_group | 2 | 10 | 10 | 80 | 10 | vmtracker | -1 + groupid | groupname | concurrency | cpu_hard_quota_limit | cpu_soft_priority | cpuset +---------+---------------+-------------+----------------------+-------------------+-------- + 6437 | default_group | 20 | 20 | 100 | -1 + 6441 | system_group | 0 | 10 | 100 | -1 + 6438 | admin_group | 2 | 10 | 100 | -1 (3 rows) -- negative -- can't create the reserved resource groups -CREATE RESOURCE GROUP default_group WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP default_group WITH (cpu_hard_quota_limit=10); ERROR: resource group "default_group" already exists -CREATE RESOURCE GROUP admin_group WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP admin_group WITH (cpu_hard_quota_limit=10); ERROR: resource group "admin_group" already exists -CREATE RESOURCE GROUP none WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP none WITH (cpu_hard_quota_limit=10); ERROR: resource group name "none" is reserved + -- multiple resource groups can't share the same name -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10); CREATE -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10); ERROR: resource group "rg_test_group" already exists DROP RESOURCE GROUP rg_test_group; DROP --- must specify cpu_rate_limit or cpuset -CREATE RESOURCE GROUP rg_test_group WITH (memory_limit=10); -ERROR: must specify cpu_rate_limit or cpuset + -- can't specify the resource limit type multiple times -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=1, cpu_rate_limit=5, memory_limit=5, concurrency=1); +CREATE RESOURCE GROUP rg_test_group WITH (concurrency=1, cpu_hard_quota_limit=5, concurrency=1); ERROR: found duplicate resource group resource type: concurrency -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5, cpu_rate_limit=5); -ERROR: found duplicate resource group resource type: cpu_rate_limit -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5, memory_limit=5); -ERROR: found duplicate resource group resource type: memory_limit -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5, memory_shared_quota=70, memory_shared_quota=80); -ERROR: found duplicate resource group resource type: memory_shared_quota -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0', cpuset='0', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5, cpu_hard_quota_limit=5); +ERROR: found duplicate resource group resource type: cpu_hard_quota_limit +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0', cpuset='0'); ERROR: found duplicate resource group resource type: cpuset --- can't specify both cpu_rate_limit and cpuset -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, cpuset='0', memory_limit=5); -ERROR: can't specify both cpu_rate_limit and cpuset + +-- can't specify both cpu_hard_quota_limit and cpuset +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5, cpuset='0'); +ERROR: can't specify both cpu_hard_quota_limit and cpuset + +-- cpu_soft_priority can't be negative value +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5, cpu_soft_priority=-100); +ERROR: cpu_soft_priority range is [1, +∞] + -- can't specify invalid cpuset -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset=''); ERROR: cpuset invalid -CREATE RESOURCE GROUP rg_test_group WITH (cpuset=',', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset=','); ERROR: cpuset invalid -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-'); ERROR: cpuset invalid -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='a', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='a'); ERROR: cpuset invalid -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='12a', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='12a'); ERROR: cpuset invalid -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0-,', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0-,'); ERROR: cpuset invalid -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-1', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-1'); ERROR: cpuset invalid -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='3-1', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='3-1'); ERROR: cpuset invalid -CREATE RESOURCE GROUP rg_test_group WITH (cpuset=' 0 ', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset=' 0 '); ERROR: cpuset invalid -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='4;a', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='4;a'); ERROR: cpuset invalid -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-;4', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-;4'); ERROR: cpuset invalid -CREATE RESOURCE GROUP rg_test_group WITH (cpuset=';5', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset=';5'); ERROR: cpuset invalid -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='5;', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='5;'); ERROR: cpuset invalid ---- suppose the core numbered 1024 is not exist -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='1024', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='1024'); ERROR: cpu cores 1024 are unavailable on the system -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,'); ERROR: the length of cpuset reached the upper limit 1024 -- can't alter to invalid cpuset -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0'); CREATE ALTER RESOURCE GROUP rg_test_group set CPUSET ''; ERROR: cpuset invalid @@ -221,48 +222,66 @@ DROP RESOURCE GROUP none; ERROR: resource group "none" does not exist -- positive -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10); CREATE -SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; - groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio ----------------+-------------+----------------+--------------+---------------------+-------------------- - rg_test_group | 20 | 10 | 10 | 80 | 0 +SELECT groupname,concurrency,cpu_hard_quota_limit, cpu_soft_priority FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; + groupname | concurrency | cpu_hard_quota_limit | cpu_soft_priority +---------------+-------------+----------------------+------------------- + rg_test_group | 20 | 10 | 100 (1 row) DROP RESOURCE GROUP rg_test_group; DROP -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=1, cpuset='0', memory_limit=10, memory_shared_quota=70, memory_spill_ratio=30); +CREATE RESOURCE GROUP rg_test_group WITH (concurrency=1, cpuset='0'); CREATE -SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; - groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio ----------------+-------------+----------------+--------------+---------------------+-------------------- - rg_test_group | 1 | -1 | 10 | 70 | 30 +SELECT groupname,concurrency,cpu_hard_quota_limit, cpu_soft_priority FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; + groupname | concurrency | cpu_hard_quota_limit | cpu_soft_priority +---------------+-------------+----------------------+------------------- + rg_test_group | 1 | -1 | 100 (1 row) DROP RESOURCE GROUP rg_test_group; DROP -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10, cpu_soft_priority=1000); CREATE -SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; - groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio ----------------+-------------+----------------+--------------+---------------------+-------------------- - rg_test_group | 20 | 10 | 0 | 80 | 0 +SELECT groupname,concurrency,cpu_hard_quota_limit, cpu_soft_priority FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; + groupname | concurrency | cpu_hard_quota_limit | cpu_soft_priority +---------------+-------------+----------------------+------------------- + rg_test_group | 20 | 10 | 1000 +(1 row) +DROP RESOURCE GROUP rg_test_group; +DROP +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=-1, cpu_soft_priority=1000); +CREATE +SELECT groupname,concurrency,cpu_hard_quota_limit, cpu_soft_priority FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; + groupname | concurrency | cpu_hard_quota_limit | cpu_soft_priority +---------------+-------------+----------------------+------------------- + rg_test_group | 20 | -1 | 1000 +(1 row) +DROP RESOURCE GROUP rg_test_group; +DROP +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0', cpu_soft_priority=1000); +CREATE +SELECT groupname,concurrency,cpu_hard_quota_limit, cpu_soft_priority FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; + groupname | concurrency | cpu_hard_quota_limit | cpu_soft_priority +---------------+-------------+----------------------+------------------- + rg_test_group | 20 | -1 | 1000 (1 row) DROP RESOURCE GROUP rg_test_group; DROP CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0'); CREATE -SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; - groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio ----------------+-------------+----------------+--------------+---------------------+-------------------- - rg_test_group | 20 | -1 | 0 | 80 | 0 +SELECT groupname,concurrency,cpu_hard_quota_limit,cpu_soft_priority FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; + groupname | concurrency | cpu_hard_quota_limit | cpu_soft_priority +---------------+-------------+----------------------+------------------- + rg_test_group | 20 | -1 | 100 (1 row) DROP RESOURCE GROUP rg_test_group; DROP CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0;0-1'); CREATE -SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; - groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio ----------------+-------------+----------------+--------------+---------------------+-------------------- - rg_test_group | 20 | -1 | 0 | 80 | 0 +SELECT groupname,concurrency,cpu_hard_quota_limit,cpu_soft_priority,cpuset FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; + groupname | concurrency | cpu_hard_quota_limit | cpu_soft_priority | cpuset +---------------+-------------+----------------------+-------------------+-------- + rg_test_group | 20 | -1 | 100 | 0;0-1 (1 row) DROP RESOURCE GROUP rg_test_group; DROP @@ -270,173 +289,84 @@ DROP -- Test: boundary check in create resource group syntax -- ---------------------------------------------------------------------- --- negative: cpu_rate_limit & memory_limit should be in [1, 100] --- if cpu_rate_limit equals -1, it will not be involved in sum -CREATE RESOURCE GROUP rg_test_group1 WITH (memory_limit=10, cpuset='0'); -CREATE -CREATE RESOURCE GROUP rg_test_group2 WITH (cpu_rate_limit=60, memory_limit=10); -CREATE -CREATE RESOURCE GROUP rg_test_group3 WITH (cpu_rate_limit=1, memory_limit=10); -ERROR: total cpu_rate_limit exceeded the limit of 100 -DROP RESOURCE GROUP rg_test_group1; -DROP -DROP RESOURCE GROUP rg_test_group2; -DROP -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=61, memory_limit=10); -ERROR: total cpu_rate_limit exceeded the limit of 100 -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=91); -ERROR: total memory_limit exceeded the limit of 100 -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=0, memory_limit=10); -ERROR: cpu_rate_limit range is [1, 100] -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=-1); -ERROR: memory_limit range is [0, 100] -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=0.9); -ERROR: invalid input syntax for type bigint: "0.9" -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=1.9); -ERROR: invalid input syntax for type bigint: "1.9" +-- negative: cpu_hard_quota_limit should be in [1, 100] +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=101); +ERROR: cpu_hard_quota_limit range is [1, 100] or equals to -1 +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=0); +ERROR: cpu_hard_quota_limit range is [1, 100] or equals to -1 + -- negative: concurrency should be in [1, max_connections] -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=-1, cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (concurrency=-1, cpu_hard_quota_limit=10); ERROR: concurrency range is [0, 'max_connections'] -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=26, cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (concurrency=26, cpu_hard_quota_limit=10); ERROR: concurrency range is [0, 'max_connections'] --- negative: memory_auditor should be 'vmtracker' or 'cgroup' -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=0, cpu_rate_limit=10, memory_limit=10, memory_auditor="randomtext"); -ERROR: memory_auditor should be "vmtracker" or "cgroup" --- negative: concurrency should be zero for cgroup audited resource group -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=1, cpu_rate_limit=10, memory_limit=10, memory_auditor="cgroup"); -ERROR: resource group concurrency must be 0 when group memory_auditor is cgroup + -- negative: the cores of cpuset in different groups mustn't overlap -CREATE RESOURCE GROUP rg_test_group1 WITH (cpuset='0', memory_limit=10); +CREATE RESOURCE GROUP rg_test_group1 WITH (cpuset='0'); CREATE -CREATE RESOURCE GROUP rg_test_group2 WITH (cpuset='0', memory_limit=10); +CREATE RESOURCE GROUP rg_test_group2 WITH (cpuset='0'); ERROR: cpu cores 0 are used by resource group rg_test_group1 DROP RESOURCE GROUP rg_test_group1; DROP --- memory_spill_ratio range is [0, 100] --- no limit on the sum of memory_shared_quota and memory_spill_ratio -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=10, memory_spill_ratio=0); -CREATE -DROP RESOURCE GROUP rg_test_group; -DROP -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=50, memory_spill_ratio=51); +-- negative: cpu_soft_priority should be in [1, +∞] +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10, cpu_soft_priority=0); +ERROR: cpu_soft_priority range is [1, +∞] +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10, cpu_soft_priority=-1); +ERROR: cpu_soft_priority range is [1, +∞] +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10, cpu_soft_priority=-1024); +ERROR: cpu_soft_priority range is [1, +∞] + +-- positive: cpu_hard_quota_limit should be in [1, 100] +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=60); CREATE DROP RESOURCE GROUP rg_test_group; DROP -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=10, memory_spill_ratio=-1); -ERROR: memory_spill_ratio range is [0, 100] -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=-1, memory_spill_ratio=10); -ERROR: memory_shared_quota range is [0, 100] - --- positive: cpu_rate_limit & memory_limit should be in [1, 100] -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=60, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=1); CREATE DROP RESOURCE GROUP rg_test_group; DROP -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=60); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10); CREATE DROP RESOURCE GROUP rg_test_group; DROP -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=1, memory_limit=10); + +-- positive: cpu_soft_priority should be in [1, +∞] +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10, cpu_soft_priority=100); CREATE DROP RESOURCE GROUP rg_test_group; DROP -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=1); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10, cpu_soft_priority=10000); CREATE DROP RESOURCE GROUP rg_test_group; DROP + -- positive: concurrency should be in [0, max_connections] -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=0, cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (concurrency=0, cpu_hard_quota_limit=10); CREATE DROP RESOURCE GROUP rg_test_group; DROP -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=1, cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (concurrency=1, cpu_hard_quota_limit=10); CREATE DROP RESOURCE GROUP rg_test_group; DROP -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=25, cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (concurrency=25, cpu_hard_quota_limit=10); CREATE DROP RESOURCE GROUP rg_test_group; DROP -CREATE RESOURCE GROUP rg1_test_group WITH (concurrency=1, cpu_rate_limit=10, memory_limit=10); -CREATE -CREATE RESOURCE GROUP rg2_test_group WITH (concurrency=1, cpu_rate_limit=50, memory_limit=50); +CREATE RESOURCE GROUP rg1_test_group WITH (concurrency=1, cpu_hard_quota_limit=10); CREATE +CREATE RESOURCE GROUP rg2_test_group WITH (concurrency=1, cpu_hard_quota_limit=500); +ERROR: cpu_hard_quota_limit range is [1, 100] or equals to -1 DROP RESOURCE GROUP rg1_test_group; DROP DROP RESOURCE GROUP rg2_test_group; -DROP -CREATE RESOURCE GROUP rg1_test_group WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); -CREATE -CREATE RESOURCE GROUP rg2_test_group WITH (concurrency=1, cpu_rate_limit=40, memory_limit=40); -CREATE -DROP RESOURCE GROUP rg1_test_group; -DROP -DROP RESOURCE GROUP rg2_test_group; -DROP -CREATE RESOURCE GROUP rg1_test_group WITH (concurrency=1, cpu_rate_limit=30, memory_limit=30); -CREATE -CREATE RESOURCE GROUP rg2_test_group WITH (concurrency=1, cpu_rate_limit=30, memory_limit=30); -CREATE -DROP RESOURCE GROUP rg1_test_group; -DROP -DROP RESOURCE GROUP rg2_test_group; -DROP --- positive: concurrency should be zero for cgroup audited resource group -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=0, cpu_rate_limit=10, memory_limit=10, memory_auditor="cgroup"); -CREATE -DROP RESOURCE GROUP rg_test_group; -DROP - --- memory_spill_ratio range is [0, 100] --- no limit on the sum of memory_shared_quota and memory_spill_ratio -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=0, memory_spill_ratio=1); -CREATE -DROP RESOURCE GROUP rg_test_group; -DROP -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=50, memory_spill_ratio=50); -CREATE -DROP RESOURCE GROUP rg_test_group; -DROP -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=0, memory_spill_ratio=100); -CREATE -DROP RESOURCE GROUP rg_test_group; -DROP -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=99, memory_spill_ratio=1); -CREATE -DROP RESOURCE GROUP rg_test_group; -DROP - --- negative: memory_spill_ratio does not accept out of range percentage values -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=-1); -ERROR: memory_spill_ratio range is [0, 100] -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=101); -ERROR: memory_spill_ratio range is [0, 100] - --- negative: memory_spill_ratio does not accept string values -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio='0'); -ERROR: memory_spill_ratio requires a numeric value -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio='10'); -ERROR: memory_spill_ratio requires a numeric value - --- negative: memory_spill_ratio does not accept float values -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=10.5); -ERROR: invalid input syntax for type bigint: "10.5" - --- negative: when memory_limit is unlimited memory_spill_ratio must be set to 0 -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=0, memory_spill_ratio=10); -ERROR: when memory_limit is unlimited memory_spill_ratio must be set to 0 - --- positive -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=0, memory_spill_ratio=0); -CREATE -DROP RESOURCE GROUP rg_test_group; -DROP +ERROR: resource group "rg2_test_group" does not exist -- ---------------------------------------------------------------------- -- Test: alter a resource group -- ---------------------------------------------------------------------- -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); CREATE -- ALTER RESOURCE GROUP SET CONCURRENCY N @@ -477,170 +407,70 @@ ALTER ALTER RESOURCE GROUP rg_test_group SET CONCURRENCY 25; ALTER --- ALTER RESOURCE GROUP SET CPU_RATE_LIMIT VALUE --- negative: cpu_rate_limit & memory_limit should be in [1, 100] -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT -0.1; +-- ALTER RESOURCE GROUP SET cpu_hard_quota_limit VALUE +-- negative: cpu_hard_quota_limit should be in [1, 100] +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit -0.1; ERROR: syntax error at or near "0.1" -LINE 1: ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT -0.1; - ^ -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT -1; -ERROR: cpu_rate_limit range is [1, 100] -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 0; -ERROR: cpu_rate_limit range is [1, 100] -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 0.7; +LINE 1: ... RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit -0.1; + ^ +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit -1; +ALTER +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 0; +ERROR: cpu_hard_quota_limit range is [1, 100] or equals to -1 +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 0.7; ERROR: syntax error at or near "0.7" -LINE 1: ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 0.7; - ^ -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 1.7; +LINE 1: ...R RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 0.7; + ^ +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 1.7; ERROR: syntax error at or near "1.7" -LINE 1: ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 1.7; - ^ -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 61; -ERROR: total cpu_rate_limit exceeded the limit of 100 -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT a; +LINE 1: ...R RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 1.7; + ^ +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 61; +ALTER +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit a; ERROR: syntax error at or near "a" -LINE 1: ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT a; - ^ -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 'abc'; +LINE 1: ...TER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit a; + ^ +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 'abc'; ERROR: syntax error at or near "'abc'" -LINE 1: ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 'abc'; - ^ -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 20%; +LINE 1: ...RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 'abc'; + ^ +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 20%; ERROR: syntax error at or near "%" -LINE 1: ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 20%; - ^ -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 0.2%; +LINE 1: ...R RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 20%; + ^ +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 0.2%; ERROR: syntax error at or near "0.2" -LINE 1: ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 0.2%; - ^ --- positive: cpu_rate_limit & memory_limit should be in [1, 100] -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 1; +LINE 1: ... RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 0.2%; + ^ +-- positive: cpu_hard_quota_limit should be in [1, 100] +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 1; ALTER -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 2; +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 2; ALTER -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 60; +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 60; ALTER DROP RESOURCE GROUP rg_test_group; DROP --- positive: total cpu_rate_limit & memory_limit should be in [1, 100] -CREATE RESOURCE GROUP rg1_test_group WITH (cpu_rate_limit=10, memory_limit=10); -CREATE -CREATE RESOURCE GROUP rg2_test_group WITH (cpu_rate_limit=10, memory_limit=10); -CREATE -ALTER RESOURCE GROUP rg1_test_group SET CPU_RATE_LIMIT 50; -ALTER -ALTER RESOURCE GROUP rg1_test_group SET CPU_RATE_LIMIT 40; -ALTER -ALTER RESOURCE GROUP rg2_test_group SET CPU_RATE_LIMIT 20; -ALTER -ALTER RESOURCE GROUP rg1_test_group SET CPU_RATE_LIMIT 30; -ALTER -ALTER RESOURCE GROUP rg2_test_group SET CPU_RATE_LIMIT 30; -ALTER -DROP RESOURCE GROUP rg1_test_group; -DROP -DROP RESOURCE GROUP rg2_test_group; -DROP --- positive: cpuset and cpu_rate_limit are exclusive, --- if cpu_rate_limit is set, cpuset is empty + +-- positive: cpuset and cpu_hard_quota_limit are exclusive, +-- if cpu_hard_quota_limit is set, cpuset is empty -- if cpuset is set, cpuset is -1 -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10); CREATE ALTER RESOURCE GROUP rg_test_group SET CPUSET '0'; ALTER -SELECT groupname,cpu_rate_limit,memory_limit,cpuset FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; - groupname | cpu_rate_limit | memory_limit | cpuset ----------------+----------------+--------------+-------- - rg_test_group | -1 | 10 | 0 +SELECT groupname,cpu_hard_quota_limit,cpuset FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; + groupname | cpu_hard_quota_limit | cpuset +---------------+----------------------+-------- + rg_test_group | -1 | 0 (1 row) -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 10; +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 10; ALTER -SELECT groupname,cpu_rate_limit,memory_limit,cpuset FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; - groupname | cpu_rate_limit | memory_limit | cpuset ----------------+----------------+--------------+-------- - rg_test_group | 10 | 10 | -1 +SELECT groupname,cpu_hard_quota_limit,cpuset FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; + groupname | cpu_hard_quota_limit | cpuset +---------------+----------------------+-------- + rg_test_group | 10 | -1 (1 row) DROP RESOURCE GROUP rg_test_group; DROP - -CREATE RESOURCE GROUP cgroup_audited_group WITH (concurrency=0, cpu_rate_limit=10, memory_limit=10, memory_auditor="cgroup"); -CREATE --- negative: memory_auditor cannot be altered -ALTER RESOURCE GROUP cgroup_audited_group SET MEMORY_AUDITOR "default"; -ERROR: syntax error at or near "MEMORY_AUDITOR" -LINE 1: ALTER RESOURCE GROUP cgroup_audited_group SET MEMORY_AUDITOR... - ^ --- negative: concurrency should be zero for cgroup audited resource group -ALTER RESOURCE GROUP cgroup_audited_group SET CONCURRENCY 10; -ERROR: resource group concurrency must be 0 when group memory_auditor is cgroup --- negative: role should not be assigned to a cgroup audited resource group -CREATE ROLE cgroup_audited_role RESOURCE GROUP cgroup_audited_group; -ERROR: you cannot assign a role to this resource group -DETAIL: The memory_auditor property for this group is not the default. -DROP RESOURCE GROUP cgroup_audited_group; -DROP - --- positive: memory_spill_ratio accepts integer values -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=20); -CREATE -ALTER RESOURCE GROUP rg_test_group SET memory_spill_ratio 10; -ALTER -DROP RESOURCE GROUP rg_test_group; -DROP - --- negative: memory_spill_ratio only accepts integer values -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=20); -CREATE -ALTER RESOURCE GROUP rg_test_group SET memory_spill_ratio '10'; -ERROR: syntax error at or near "'10'" -LINE 1: ...ER RESOURCE GROUP rg_test_group SET memory_spill_ratio '10'; - ^ -ALTER RESOURCE GROUP rg_test_group SET memory_spill_ratio 10.5; -ERROR: syntax error at or near "10.5" -LINE 1: ...ER RESOURCE GROUP rg_test_group SET memory_spill_ratio 10.5; - ^ -DROP RESOURCE GROUP rg_test_group; -DROP - --- negative: memory_spill_ratio does not accept out of range values -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=20); -CREATE -ALTER RESOURCE GROUP rg_test_group SET memory_spill_ratio -1; -ERROR: memory_spill_ratio range is [0, 100] -ALTER RESOURCE GROUP rg_test_group SET memory_spill_ratio 101; -ERROR: memory_spill_ratio range is [0, 100] -DROP RESOURCE GROUP rg_test_group; -DROP - --- positive: memory_limit can be altered to unlimited if memory_spill_ratio is 0 -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=0); -CREATE -ALTER RESOURCE GROUP rg_test_group SET memory_limit 0; -ALTER -DROP RESOURCE GROUP rg_test_group; -DROP - --- negative: memory_spill_ratio can only be set to 0 if memory_limit is unlimited -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=0, memory_spill_ratio=0); -CREATE -ALTER RESOURCE GROUP rg_test_group SET memory_spill_ratio 10; -ERROR: when memory_limit is unlimited memory_spill_ratio must be set to 0 -DROP RESOURCE GROUP rg_test_group; -DROP - --- positive: memory_spill_ratio accepts a percentage value only if --- memory_limit is limited -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=0); -CREATE -ALTER RESOURCE GROUP rg_test_group SET memory_spill_ratio 10; -ALTER -DROP RESOURCE GROUP rg_test_group; -DROP - --- negative: memory_limit must be limited if memory_spill_ratio > 0 -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=10); -CREATE -ALTER RESOURCE GROUP rg_test_group SET memory_limit 0; -ERROR: when memory_limit is unlimited memory_spill_ratio must be set to 0 -DROP RESOURCE GROUP rg_test_group; -DROP diff --git a/src/test/isolation2/expected/resgroup/resgroup_transaction.out b/src/test/isolation2/expected/resgroup/resgroup_transaction.out index 753ce211cbd..01236afc7d2 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_transaction.out +++ b/src/test/isolation2/expected/resgroup/resgroup_transaction.out @@ -8,7 +8,7 @@ ERROR: resource group "rg_test_group" does not exist --end_ignore -- helper view to check the resgroup status -CREATE OR REPLACE VIEW rg_test_monitor AS SELECT groupname, concurrency, cpu_rate_limit FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; +CREATE OR REPLACE VIEW rg_test_monitor AS SELECT groupname, concurrency, cpu_hard_quota_limit FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; CREATE -- ---------------------------------------------------------------------- @@ -18,17 +18,17 @@ CREATE -- CREATE RESOURCE GROUP cannot run inside a transaction block BEGIN; BEGIN -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); ERROR: CREATE RESOURCE GROUP cannot run inside a transaction block END; END SELECT * FROM rg_test_monitor; - groupname | concurrency | cpu_rate_limit ------------+-------------+---------------- + groupname | concurrency | cpu_hard_quota_limit +-----------+-------------+---------------------- (0 rows) -- ALTER RESOURCE GROUP cannot run inside a transaction block -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); CREATE BEGIN; BEGIN @@ -37,9 +37,9 @@ ERROR: ALTER RESOURCE GROUP cannot run inside a transaction block END; END SELECT * FROM rg_test_monitor; - groupname | concurrency | cpu_rate_limit ----------------+-------------+---------------- - rg_test_group | 20 | 5 + groupname | concurrency | cpu_hard_quota_limit +---------------+-------------+---------------------- + rg_test_group | 20 | 5 (1 row) -- DROP RESOURCE GROUP cannot run inside a transaction block @@ -50,9 +50,9 @@ ERROR: DROP RESOURCE GROUP cannot run inside a transaction block END; END SELECT * FROM rg_test_monitor; - groupname | concurrency | cpu_rate_limit ----------------+-------------+---------------- - rg_test_group | 20 | 5 + groupname | concurrency | cpu_hard_quota_limit +---------------+-------------+---------------------- + rg_test_group | 20 | 5 (1 row) DROP RESOURCE GROUP rg_test_group; @@ -71,17 +71,17 @@ SELECT 1; ---------- 1 (1 row) -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); ERROR: CREATE RESOURCE GROUP cannot run inside a transaction block END; END SELECT * FROM rg_test_monitor; - groupname | concurrency | cpu_rate_limit ------------+-------------+---------------- + groupname | concurrency | cpu_hard_quota_limit +-----------+-------------+---------------------- (0 rows) -- ALTER RESOURCE GROUP cannot run inside a transaction block -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); CREATE BEGIN; BEGIN @@ -95,9 +95,9 @@ ERROR: ALTER RESOURCE GROUP cannot run inside a transaction block END; END SELECT * FROM rg_test_monitor; - groupname | concurrency | cpu_rate_limit ----------------+-------------+---------------- - rg_test_group | 20 | 5 + groupname | concurrency | cpu_hard_quota_limit +---------------+-------------+---------------------- + rg_test_group | 20 | 5 (1 row) -- DROP RESOURCE GROUP cannot run inside a transaction block @@ -113,9 +113,9 @@ ERROR: DROP RESOURCE GROUP cannot run inside a transaction block END; END SELECT * FROM rg_test_monitor; - groupname | concurrency | cpu_rate_limit ----------------+-------------+---------------- - rg_test_group | 20 | 5 + groupname | concurrency | cpu_hard_quota_limit +---------------+-------------+---------------------- + rg_test_group | 20 | 5 (1 row) DROP RESOURCE GROUP rg_test_group; @@ -131,19 +131,19 @@ BEGIN; BEGIN SAVEPOINT rg_savepoint; SAVEPOINT -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); ERROR: CREATE RESOURCE GROUP cannot run inside a transaction block ROLLBACK TO SAVEPOINT rg_savepoint; ROLLBACK ABORT; ABORT SELECT * FROM rg_test_monitor; - groupname | concurrency | cpu_rate_limit ------------+-------------+---------------- + groupname | concurrency | cpu_hard_quota_limit +-----------+-------------+---------------------- (0 rows) -- ALTER RESOURCE GROUP cannot run inside a subtransaction -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); CREATE BEGIN; BEGIN @@ -156,9 +156,9 @@ ROLLBACK ABORT; ABORT SELECT * FROM rg_test_monitor; - groupname | concurrency | cpu_rate_limit ----------------+-------------+---------------- - rg_test_group | 20 | 5 + groupname | concurrency | cpu_hard_quota_limit +---------------+-------------+---------------------- + rg_test_group | 20 | 5 (1 row) -- DROP RESOURCE GROUP cannot run inside a subtransaction @@ -173,9 +173,9 @@ ROLLBACK ABORT; ABORT SELECT * FROM rg_test_monitor; - groupname | concurrency | cpu_rate_limit ----------------+-------------+---------------- - rg_test_group | 20 | 5 + groupname | concurrency | cpu_hard_quota_limit +---------------+-------------+---------------------- + rg_test_group | 20 | 5 (1 row) DROP RESOURCE GROUP rg_test_group; @@ -185,7 +185,7 @@ DROP -- Test: create/alter/drop a resource group in function call -- ---------------------------------------------------------------------- -CREATE OR REPLACE FUNCTION rg_create_func() RETURNS VOID AS $$ CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5) $$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION rg_create_func() RETURNS VOID AS $$ CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5) $$ LANGUAGE SQL; CREATE CREATE OR REPLACE FUNCTION rg_alter_func() RETURNS VOID AS $$ ALTER RESOURCE GROUP rg_test_group SET CONCURRENCY 10 $$ LANGUAGE SQL; @@ -199,20 +199,20 @@ SELECT * FROM rg_create_func(); ERROR: CREATE RESOURCE GROUP cannot be executed from a function CONTEXT: SQL function "rg_create_func" statement 1 SELECT * FROM rg_test_monitor; - groupname | concurrency | cpu_rate_limit ------------+-------------+---------------- + groupname | concurrency | cpu_hard_quota_limit +-----------+-------------+---------------------- (0 rows) -- ALTER RESOURCE GROUP cannot run inside a function call -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); CREATE SELECT * FROM rg_alter_func(); ERROR: ALTER RESOURCE GROUP cannot be executed from a function CONTEXT: SQL function "rg_alter_func" statement 1 SELECT * FROM rg_test_monitor; - groupname | concurrency | cpu_rate_limit ----------------+-------------+---------------- - rg_test_group | 20 | 5 + groupname | concurrency | cpu_hard_quota_limit +---------------+-------------+---------------------- + rg_test_group | 20 | 5 (1 row) -- DROP RESOURCE GROUP cannot run inside a function call @@ -220,9 +220,9 @@ SELECT * FROM rg_drop_func(); ERROR: DROP RESOURCE GROUP cannot be executed from a function CONTEXT: SQL function "rg_drop_func" statement 1 SELECT * FROM rg_test_monitor; - groupname | concurrency | cpu_rate_limit ----------------+-------------+---------------- - rg_test_group | 20 | 5 + groupname | concurrency | cpu_hard_quota_limit +---------------+-------------+---------------------- + rg_test_group | 20 | 5 (1 row) DROP RESOURCE GROUP rg_test_group; diff --git a/src/test/isolation2/expected/resgroup/resgroup_unassign_entrydb.out b/src/test/isolation2/expected/resgroup/resgroup_unassign_entrydb.out index ce274ff2ad2..a7d5b66a1cf 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_unassign_entrydb.out +++ b/src/test/isolation2/expected/resgroup/resgroup_unassign_entrydb.out @@ -10,7 +10,7 @@ DROP RESOURCE GROUP rg_test; ERROR: resource group "rg_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test WITH (concurrency=2, cpu_hard_quota_limit=10); CREATE CREATE ROLE role_test RESOURCE GROUP rg_test; CREATE diff --git a/src/test/isolation2/expected/resgroup/resgroup_unlimit_memory_spill_ratio.out b/src/test/isolation2/expected/resgroup/resgroup_unlimit_memory_spill_ratio.out deleted file mode 100644 index 67370ff8a6b..00000000000 --- a/src/test/isolation2/expected/resgroup/resgroup_unlimit_memory_spill_ratio.out +++ /dev/null @@ -1,155 +0,0 @@ --- start_ignore -DROP RESOURCE GROUP rg_spill_test; -ERROR: resource group "rg_spill_test" does not exist --- end_ignore - --- create -CREATE RESOURCE GROUP rg_spill_test WITH (concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50, memory_spill_ratio=60); -CREATE -DROP RESOURCE GROUP rg_spill_test; -DROP - -CREATE RESOURCE GROUP rg_spill_test WITH (concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50, memory_spill_ratio=0); -CREATE -DROP RESOURCE GROUP rg_spill_test; -DROP - -CREATE RESOURCE GROUP rg_spill_test WITH (concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50, memory_spill_ratio=100); -CREATE -DROP RESOURCE GROUP rg_spill_test; -DROP - -CREATE RESOURCE GROUP rg_spill_test WITH (concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50, memory_spill_ratio=-1); -ERROR: memory_spill_ratio range is [0, 100] -DROP RESOURCE GROUP rg_spill_test; -ERROR: resource group "rg_spill_test" does not exist - -CREATE RESOURCE GROUP rg_spill_test WITH (concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50, memory_spill_ratio=101); -ERROR: memory_spill_ratio range is [0, 100] -DROP RESOURCE GROUP rg_spill_test; -ERROR: resource group "rg_spill_test" does not exist - --- alter -CREATE RESOURCE GROUP rg_spill_test WITH (concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50, memory_spill_ratio=20); -CREATE - -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 60; -ALTER -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 0; -ALTER -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 100; -ALTER -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO -1; -ERROR: memory_spill_ratio range is [0, 100] -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 101; -ERROR: memory_spill_ratio range is [0, 100] - -DROP RESOURCE GROUP rg_spill_test; -DROP - --- set GUC -CREATE RESOURCE GROUP rg_spill_test WITH (concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50, memory_spill_ratio=20); -CREATE - -SET MEMORY_SPILL_RATIO TO 60; -SET -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 60 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) - -SET MEMORY_SPILL_RATIO TO 0; -SET -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 0 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) - -SET MEMORY_SPILL_RATIO TO 100; -SET -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 100 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) - -SET MEMORY_SPILL_RATIO TO -1; -ERROR: -1 is outside the valid range for parameter "memory_spill_ratio" (0 .. 100) -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 100 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) - -SET MEMORY_SPILL_RATIO TO 101; -ERROR: 101 is outside the valid range for parameter "memory_spill_ratio" (0 .. 100) -SHOW MEMORY_SPILL_RATIO; - memory_spill_ratio --------------------- - 100 -(1 row) -SELECT 1; - ?column? ----------- - 1 -(1 row) - -DROP RESOURCE GROUP rg_spill_test; -DROP - --- test case for query_mem=0 -CREATE TABLE test_zero_workmem(c int); -CREATE - ---This test intends to build a situation that query_mem = 0 ---and verify under such condition work_mem will be used. -CREATE RESOURCE GROUP rg_zero_workmem WITH (concurrency=2, cpu_rate_limit=10, memory_limit=20, memory_shared_quota=20, memory_spill_ratio=0); -CREATE - -CREATE ROLE role_zero_workmem SUPERUSER RESOURCE GROUP rg_zero_workmem; -CREATE -SET ROLE TO role_zero_workmem; -SET - ---test query that will use spi -ANALYZE test_zero_workmem; -ANALYZE - ---test normal DML -SELECT count(*) FROM test_zero_workmem; - count -------- - 0 -(1 row) - ---clean env -RESET ROLE; -RESET -DROP TABLE test_zero_workmem; -DROP -DROP ROLE role_zero_workmem; -DROP -DROP RESOURCE GROUP rg_zero_workmem; -DROP diff --git a/src/test/isolation2/expected/resgroup/resgroup_views.out b/src/test/isolation2/expected/resgroup/resgroup_views.out index a31d5524b6d..770df52edda 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_views.out +++ b/src/test/isolation2/expected/resgroup/resgroup_views.out @@ -1,25 +1,25 @@ select * from gp_toolkit.gp_resgroup_config where groupname='default_group'; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+---------------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- - 6437 | default_group | 20 | 20 | 30 | 80 | 10 | vmtracker | -1 + groupid | groupname | concurrency | cpu_hard_quota_limit | cpu_soft_priority | cpuset +---------+---------------+-------------+----------------------+-------------------+-------- + 6437 | default_group | 20 | 20 | 100 | -1 (1 row) -select rsgname , groupid , num_running , num_queueing , num_queued , num_executed , cpu_usage->'-1' as qd_cpu_usage , memory_usage->'-1'->'used' as qd_memory_used , memory_usage->'-1'->'shared_used' as qd_memory_shared_used from gp_toolkit.gp_resgroup_status where rsgname='default_group'; - rsgname | groupid | num_running | num_queueing | num_queued | num_executed | qd_cpu_usage | qd_memory_used | qd_memory_shared_used ----------------+---------+-------------+--------------+------------+--------------+--------------+----------------+----------------------- - default_group | 6437 | 0 | 0 | 0 | 0 | 0 | 0 | 0 +select rsgname , groupid , num_running , num_queueing , num_queued , num_executed , cpu_usage->'-1' as qd_cpu_usage from gp_toolkit.gp_resgroup_status where rsgname='default_group'; + rsgname | groupid | num_running | num_queueing | num_queued | num_executed | qd_cpu_usage +---------------+---------+-------------+--------------+------------+--------------+-------------- + default_group | 6437 | 0 | 0 | 0 | 0 | 0 (1 row) -select rsgname , groupid , cpu , memory_used , memory_shared_used from gp_toolkit.gp_resgroup_status_per_host s join gp_segment_configuration c on s.hostname=c.hostname and c.content=-1 and role='p' where rsgname='default_group'; - rsgname | groupid | cpu | memory_used | memory_shared_used ----------------+---------+------+-------------+-------------------- - default_group | 6437 | 0.00 | 0 | 0 +select rsgname , groupid , cpu from gp_toolkit.gp_resgroup_status_per_host s join gp_segment_configuration c on s.hostname=c.hostname and c.content=-1 and role='p' where rsgname='default_group'; + rsgname | groupid | cpu +---------------+---------+------ + default_group | 6437 | 0.00 (1 row) -select rsgname , groupid , segment_id , cpu , memory_used , memory_shared_used from gp_toolkit.gp_resgroup_status_per_segment where rsgname='default_group' and segment_id=-1; - rsgname | groupid | segment_id | cpu | memory_used | memory_shared_used ----------------+---------+------------+------+-------------+-------------------- - default_group | 6437 | -1 | 0.00 | 0 | 0 +select rsgname , groupid , segment_id , cpu from gp_toolkit.gp_resgroup_status_per_segment where rsgname='default_group' and segment_id=-1; + rsgname | groupid | segment_id | cpu +---------------+---------+------------+------ + default_group | 6437 | -1 | 0.00 (1 row) select * from gp_toolkit.gp_resgroup_role where rrrolname='gpadmin'; @@ -33,40 +33,40 @@ select * from gp_toolkit.gp_resgroup_role where rrrolname='gpadmin'; -- start_ignore select * from gp_toolkit.gp_resgroup_config; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+---------------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- - 6437 | default_group | 20 | 20 | 30 | 80 | 10 | vmtracker | -1 - 6441 | system_group | -1 | 10 | 0 | 0 | 0 | vmtracker | -1 - 6438 | admin_group | 2 | 10 | 10 | 80 | 10 | vmtracker | -1 + groupid | groupname | concurrency | cpu_hard_quota_limit | cpu_soft_priority | cpuset +---------+---------------+-------------+----------------------+-------------------+-------- + 6437 | default_group | 20 | 20 | 100 | -1 + 6441 | system_group | 0 | 10 | 100 | -1 + 6438 | admin_group | 2 | 10 | 100 | -1 (3 rows) select * from gp_toolkit.gp_resgroup_status; - rsgname | groupid | num_running | num_queueing | num_queued | num_executed | total_queue_duration | cpu_usage | memory_usage ----------------+---------+-------------+--------------+------------+--------------+----------------------+-----------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - default_group | 6437 | 0 | 0 | 0 | 0 | @ 0 | {'-1': 0.0, '0': 0.0, '1': 0.0, '2': 0.0} | {'-1': {'used': 0, 'available': 204, 'quota_used': 0, 'quota_available': 40, 'quota_granted': 40, 'quota_proposed': 40, 'shared_used': 0, 'shared_available': 164, 'shared_granted': 164, 'shared_proposed': 164}, '0': {'used': 0, 'available': 204, 'quota_used': 0, 'quota_available': 40, 'quota_granted': 40, 'quota_proposed': 40, 'shared_used': 0, 'shared_available': 164, 'shared_granted': 164, 'shared_proposed': 164}, '1': {'used': 0, 'available': 204, 'quota_used': 0, 'quota_available': 40, 'quota_granted': 40, 'quota_proposed': 40, 'shared_used': 0, 'shared_available': 164, 'shared_granted': 164, 'shared_proposed': 164}, '2': {'used': 0, 'available': 204, 'quota_used': 0, 'quota_available': 40, 'quota_granted': 40, 'quota_proposed': 40, 'shared_used': 0, 'shared_available': 164, 'shared_granted': 164, 'shared_proposed': 164}} - admin_group | 6438 | 1 | 0 | 0 | 19 | @ 0 | {'-1': 0.31, '0': 0.11, '1': 0.08, '2': 0.08} | {'-1': {'used': 0, 'available': 68, 'quota_used': 6, 'quota_available': 6, 'quota_granted': 12, 'quota_proposed': 12, 'shared_used': 0, 'shared_available': 56, 'shared_granted': 56, 'shared_proposed': 56}, '0': {'used': 0, 'available': 68, 'quota_used': 6, 'quota_available': 6, 'quota_granted': 12, 'quota_proposed': 12, 'shared_used': 0, 'shared_available': 56, 'shared_granted': 56, 'shared_proposed': 56}, '1': {'used': 0, 'available': 68, 'quota_used': 6, 'quota_available': 6, 'quota_granted': 12, 'quota_proposed': 12, 'shared_used': 0, 'shared_available': 56, 'shared_granted': 56, 'shared_proposed': 56}, '2': {'used': 0, 'available': 68, 'quota_used': 6, 'quota_available': 6, 'quota_granted': 12, 'quota_proposed': 12, 'shared_used': 0, 'shared_available': 56, 'shared_granted': 56, 'shared_proposed': 56}} - system_group | 6441 | 0 | 0 | 0 | 0 | @ 0 | {'-1': 0.08, '0': 0.07, '1': 0.07, '2': 0.07} | {'-1': {'used': 0, 'available': 0, 'quota_used': 0, 'quota_available': 0, 'quota_granted': 0, 'quota_proposed': 0, 'shared_used': 0, 'shared_available': 0, 'shared_granted': 0, 'shared_proposed': 0}, '0': {'used': 0, 'available': 0, 'quota_used': 0, 'quota_available': 0, 'quota_granted': 0, 'quota_proposed': 0, 'shared_used': 0, 'shared_available': 0, 'shared_granted': 0, 'shared_proposed': 0}, '1': {'used': 0, 'available': 0, 'quota_used': 0, 'quota_available': 0, 'quota_granted': 0, 'quota_proposed': 0, 'shared_used': 0, 'shared_available': 0, 'shared_granted': 0, 'shared_proposed': 0}, '2': {'used': 0, 'available': 0, 'quota_used': 0, 'quota_available': 0, 'quota_granted': 0, 'quota_proposed': 0, 'shared_used': 0, 'shared_available': 0, 'shared_granted': 0, 'shared_proposed': 0}} + rsgname | groupid | num_running | num_queueing | num_queued | num_executed | total_queue_duration | cpu_usage +---------------+---------+-------------+--------------+------------+--------------+----------------------+----------------------------------------------- + default_group | 6437 | 0 | 0 | 0 | 0 | @ 0 | {'-1': 0.0, '0': 0.0, '1': 0.0, '2': 0.0} + admin_group | 6438 | 1 | 0 | 0 | 13 | @ 0 | {'-1': 0.33, '0': 0.12, '1': 0.09, '2': 0.05} + system_group | 6441 | 0 | 0 | 0 | 0 | @ 0 | {'-1': 0.09, '0': 0.08, '1': 0.08, '2': 0.08} (3 rows) select * from gp_toolkit.gp_resgroup_status_per_host; - rsgname | groupid | hostname | cpu | memory_used | memory_available | memory_quota_used | memory_quota_available | memory_shared_used | memory_shared_available ----------------+---------+----------+------+-------------+------------------+-------------------+------------------------+--------------------+------------------------- - admin_group | 6438 | zero | 0.14 | 1 | 271 | 24 | 24 | 0 | 224 - default_group | 6437 | zero | 0.00 | 0 | 816 | 0 | 160 | 0 | 656 - system_group | 6441 | zero | 0.08 | 0 | 0 | 0 | 0 | 0 | 0 + rsgname | groupid | hostname | cpu +---------------+---------+----------+------ + admin_group | 6438 | zero | 0.15 + default_group | 6437 | zero | 0.00 + system_group | 6441 | zero | 0.05 (3 rows) select * from gp_toolkit.gp_resgroup_status_per_segment; - rsgname | groupid | hostname | segment_id | cpu | memory_used | memory_available | memory_quota_used | memory_quota_available | memory_shared_used | memory_shared_available ----------------+---------+----------+------------+------+-------------+------------------+-------------------+------------------------+--------------------+------------------------- - admin_group | 6438 | zero | -1 | 0.25 | 1 | 67 | 6 | 6 | 0 | 56 - admin_group | 6438 | zero | 0 | 0.13 | 0 | 68 | 6 | 6 | 0 | 56 - admin_group | 6438 | zero | 1 | 0.10 | 0 | 68 | 6 | 6 | 0 | 56 - admin_group | 6438 | zero | 2 | 0.10 | 0 | 68 | 6 | 6 | 0 | 56 - default_group | 6437 | zero | -1 | 0.00 | 0 | 204 | 0 | 40 | 0 | 164 - default_group | 6437 | zero | 0 | 0.00 | 0 | 204 | 0 | 40 | 0 | 164 - default_group | 6437 | zero | 1 | 0.00 | 0 | 204 | 0 | 40 | 0 | 164 - default_group | 6437 | zero | 2 | 0.00 | 0 | 204 | 0 | 40 | 0 | 164 - system_group | 6441 | zero | -1 | 0.08 | 0 | 0 | 0 | 0 | 0 | 0 - system_group | 6441 | zero | 0 | 0.06 | 0 | 0 | 0 | 0 | 0 | 0 - system_group | 6441 | zero | 1 | 0.06 | 0 | 0 | 0 | 0 | 0 | 0 - system_group | 6441 | zero | 2 | 0.06 | 0 | 0 | 0 | 0 | 0 | 0 + rsgname | groupid | hostname | segment_id | cpu +---------------+---------+----------+------------+------ + admin_group | 6438 | zero | -1 | 0.25 + admin_group | 6438 | zero | 0 | 0.11 + admin_group | 6438 | zero | 1 | 0.11 + admin_group | 6438 | zero | 2 | 0.07 + default_group | 6437 | zero | -1 | 0.00 + default_group | 6437 | zero | 0 | 0.00 + default_group | 6437 | zero | 1 | 0.00 + default_group | 6437 | zero | 2 | 0.00 + system_group | 6441 | zero | -1 | 0.06 + system_group | 6441 | zero | 0 | 0.05 + system_group | 6441 | zero | 1 | 0.05 + system_group | 6441 | zero | 2 | 0.05 (12 rows) -- end_ignore diff --git a/src/test/isolation2/input/resgroup/disable_resgroup.source b/src/test/isolation2/input/resgroup/disable_resgroup.source index 4578aa7c11d..df0084a2887 100644 --- a/src/test/isolation2/input/resgroup/disable_resgroup.source +++ b/src/test/isolation2/input/resgroup/disable_resgroup.source @@ -5,7 +5,6 @@ -- reset the GUC and restart cluster. -- start_ignore ! gpconfig -r gp_resource_manager; -! gpconfig -r gp_resource_group_memory_limit; ! gpstop -rai; -- end_ignore @@ -13,10 +12,4 @@ SHOW gp_resource_manager; -- reset settings ALTER RESOURCE GROUP admin_group SET concurrency 10; -ALTER RESOURCE GROUP admin_group SET memory_spill_ratio 0; -ALTER RESOURCE GROUP admin_group SET memory_limit 10; -ALTER RESOURCE GROUP admin_group SET memory_shared_quota 80; -ALTER RESOURCE GROUP default_group SET concurrency 20; -ALTER RESOURCE GROUP default_group SET memory_spill_ratio 0; -ALTER RESOURCE GROUP default_group SET memory_limit 0; -ALTER RESOURCE GROUP default_group SET memory_shared_quota 80; +ALTER RESOURCE GROUP default_group SET concurrency 20; \ No newline at end of file diff --git a/src/test/isolation2/input/resgroup/enable_resgroup.source b/src/test/isolation2/input/resgroup/enable_resgroup.source index b6effde64f9..024235e8d24 100644 --- a/src/test/isolation2/input/resgroup/enable_resgroup.source +++ b/src/test/isolation2/input/resgroup/enable_resgroup.source @@ -8,43 +8,9 @@ CREATE LANGUAGE plpython3u; ! mkdir @cgroup_mnt_point@/cpuset/gpdb; -- end_ignore --- we want to simulate a 3-segment (both master and primary) cluster with 2GB --- memory and gp_resource_group_memory_limit=100%, suppose: --- --- - total: the total memory on the system; --- - nsegs: the max per-host segment count (including both master and primaries); --- - limit: the gp_resource_group_memory_limit used for the simulation; --- --- then we have: total * limit / nsegs = 2GB * 1.0 / 3 --- so: limit = 2GB * 1.0 / 3 * nsegs / total --- --- with the simulation each primary segment should manage 682MB memory. -DO LANGUAGE plpython3u $$ - import os - import psutil - - mem = psutil.virtual_memory().total - swap = psutil.swap_memory().total - overcommit = int(open('/proc/sys/vm/overcommit_ratio').readline()) - total = swap + mem * overcommit / 100. - - nsegs = int(plpy.execute(''' - SELECT count(hostname) as nsegs - FROM gp_segment_configuration - WHERE preferred_role = 'p' - GROUP BY hostname - ORDER BY count(hostname) DESC - LIMIT 1 - ''')[0]['nsegs']) - - limit = (2 << 30) * 1.0 * nsegs / 3 / total - os.system('gpconfig -c gp_resource_group_memory_limit -v {:f}'.format(limit)) -$$; - -- enable resource group and restart cluster. -- start_ignore ! gpconfig -c gp_resource_manager -v group; -! gpconfig -c gp_resource_group_cpu_limit -v 0.9; ! gpconfig -c max_connections -v 250 -m 25; ! gpconfig -c runaway_detector_activation_percent -v 100; ! gpstop -rai; @@ -66,12 +32,3 @@ $$; -- very small memory quota for each resgroup slot, correct it. 0: ALTER RESOURCE GROUP admin_group SET concurrency 2; --- explicitly set memory settings -0: ALTER RESOURCE GROUP admin_group SET memory_limit 10; -0: ALTER RESOURCE GROUP default_group SET memory_limit 30; -0: ALTER RESOURCE GROUP admin_group SET memory_shared_quota 80; -0: ALTER RESOURCE GROUP default_group SET memory_shared_quota 80; --- in later cases we will SHOW memory_spill_ratio as first command --- to verify that it can be correctly loaded even for bypassed commands -0: ALTER RESOURCE GROUP admin_group SET memory_spill_ratio 10; -0: ALTER RESOURCE GROUP default_group SET memory_spill_ratio 10; diff --git a/src/test/isolation2/input/resgroup/resgroup_alter_memory.source b/src/test/isolation2/input/resgroup/resgroup_alter_memory.source deleted file mode 100644 index e4ccf71db93..00000000000 --- a/src/test/isolation2/input/resgroup/resgroup_alter_memory.source +++ /dev/null @@ -1,545 +0,0 @@ --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP ROLE IF EXISTS role2_memory_test; -DROP RESOURCE GROUP rg1_memory_test; -DROP RESOURCE GROUP rg2_memory_test; --- end_ignore - -CREATE OR REPLACE FUNCTION resGroupPalloc(float) RETURNS int AS -'@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc' -LANGUAGE C READS SQL DATA; - -CREATE OR REPLACE FUNCTION hold_memory_by_percent(int, float) RETURNS int AS $$ - SELECT * FROM resGroupPalloc($2) -$$ LANGUAGE sql; - -CREATE OR REPLACE FUNCTION hold_memory_by_percent_on_qe(int, float) RETURNS int AS $$ - SELECT resGroupPalloc($2) FROM gp_dist_random('gp_id') -$$ LANGUAGE sql; - --- After a 'q' command the client connection is disconnected but the --- QD may still be alive, if we then query pg_stat_activity quick enough --- we might still see this session with query ''. --- A filter is put to filter out this kind of quitted sessions. -CREATE OR REPLACE VIEW rg_activity_status AS - SELECT rsgname, wait_event_type, state, query - FROM pg_stat_activity - WHERE rsgname in ('rg1_memory_test', 'rg2_memory_test') - AND query <> '' - ORDER BY sess_id; - -CREATE OR REPLACE VIEW rg_mem_status AS - SELECT groupname, memory_limit, memory_shared_quota - FROM gp_toolkit.gp_resgroup_config - WHERE groupname='rg1_memory_test' OR groupname='rg2_memory_test' - ORDER BY groupid; - -CREATE RESOURCE GROUP rg1_memory_test - WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=60, memory_shared_quota=0, memory_spill_ratio=5); -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; - --- --- 1.1) alter memory shared quota with low memory usage --- - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 2; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 60; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 50; - -SELECT * FROM rg_mem_status; - -1: SET ROLE TO role1_memory_test; -1: BEGIN; -1: SELECT hold_memory_by_percent(1,0.1); --- proc 1 gets a quota of 60%*50%/2=15% --- it has consumed 60%*10%=6% --- the group has 60%*50%-15%=15% free quota and 60%*50%=30% free shared quota - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 20; - --- now the group has 60%*80%-15%=33% free quota and 60%*20%=12% free shared quota, --- so memory_shared_quota shall be the new value. -SELECT * FROM rg_mem_status; - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 70; - --- now the group has 60%*30%-15%=3% free quota and 60%*70%=42% free shared quota, --- so memory_shared_quota shall be the new value. -SELECT * FROM rg_mem_status; - --- --- 1.2) alter memory shared quota with high memory usage --- - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 80; - --- now the group has 60%*20%-15%=-3% free quota and 60%*80%=48% free shared quota, --- so memory_shared_quota shall be the old value. -SELECT * FROM rg_mem_status; - -1q: - --- --- 1.3) alter memory shared quota up and down --- - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 2; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 40; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 60; - -SELECT * FROM rg_mem_status; - -1: SET ROLE TO role1_memory_test; -1: BEGIN; --- proc1 has a quota of 40%*40%/2=8% --- rg1 still have 8% free quota - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 4; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 70; --- rg1 should free some quota, 40%*40%/2*1-40%*30%/4*3=8%-9%=-1% --- rg1 now have 40%*20%=8% free quota --- each slot in rg1 requires 40%*30%/4=3% - -2: SET ROLE TO role1_memory_test; -2: BEGIN; -3: SET ROLE TO role1_memory_test; -3: BEGIN; --- proc2&proc3 each requires a quota of 40%*30%/4=3% --- rg1 now has 8%-3%*2=2% free quota - -4: SET ROLE TO role1_memory_test; -4&: BEGIN; --- proc4 shall be pending - -SELECT * FROM rg_mem_status; -SELECT * FROM rg_activity_status; - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 40; --- rg1 now have 40%*60%-8%-3%*2=10% free quota again --- and now proc4 requires a quota of 40%*60%/4=6%, --- so it shall be waken up - -4<: -SELECT * FROM rg_mem_status; -SELECT * FROM rg_activity_status; - -1q: -2q: -3q: -4q: - --- --- 2.1) alter memory limit with low memory usage (and low memory shared usage) --- - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 2; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 50; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 60; - -SELECT * FROM rg_mem_status; - -1: SET ROLE TO role1_memory_test; -1: BEGIN; -1: SELECT hold_memory_by_percent(1,0.1); --- proc 1 gets a quota of 50%*40%/2=10% --- it has consumed 50%*10%=5% --- the group has 50%*40%-10%=10% free quota and 50%*60%=30% free shared quota - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 60; - --- now the group has 60%*40%-10%=14% free quota and 60%*60%=36% free shared quota, --- so memory_limit can be the new value, however at the moment we don't update --- value when increasing memory_limit, so it's still the old value. -SELECT * FROM rg_mem_status; - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 40; - --- now the group has 40%*40%-10%=6% free quota and 40%*60%=24% free shared quota, --- so memory_limit shall be the new value. -SELECT * FROM rg_mem_status; - --- --- 2.2) alter memory limit with high memory usage and low memory shared usage --- - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 10; - --- now the group has 10%*40%-10%=-6% free quota and 10%*60%=6% free shared quota, --- so memory_limit shall be the old value. -SELECT * FROM rg_mem_status; - --- --- 2.3) alter memory limit with high memory usage and high memory shared usage --- - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 2; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 40; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 60; - --- now the group has 40%*40%-10%=6% free quota and 40%*60%=24% free shared quota, -SELECT * FROM rg_mem_status; - -1: SELECT hold_memory_by_percent(1,0.5); --- proc 1 has consumed another 50%*50%=25%, in total 30% --- now it has consumed all its 10% quota, as well as 20% shared quota --- now the group has 40%*40%-10%=6% free quota and 40%*60%-20%=4% free shared quota, - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 20; - --- now the group has 40%*80%-10%=22% free quota and 40%*20%-20%=-12% free shared quota, --- so memory_shared_quota shall be the old value. -SELECT * FROM rg_mem_status; - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 30; - --- now the group has 30%*80%-10%=14% free quota and 30%*20%-20%=-14% free shared quota, --- so memory_limit shall be the old value. -SELECT * FROM rg_mem_status; - -1q: - --- --- 3.1) decrease one group and increase another, no load --- - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 3; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 30; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 0; - -CREATE RESOURCE GROUP rg2_memory_test - WITH (concurrency=3, cpu_rate_limit=10, - memory_limit=30, memory_shared_quota=0, memory_spill_ratio=5); -CREATE ROLE role2_memory_test RESOURCE GROUP rg2_memory_test; - --- default_group and admin_group consumed 40% memory_limit, --- so with rg1+rg2=60% all memory_limit is already allocated, --- so increasing any of them shall fail. -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 31; - -SELECT * FROM rg_mem_status; - --- but increase could succeed if another rg is first decreased. -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 20; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 40; - -SELECT * FROM rg_mem_status; - --- --- 3.2) decrease one group and increase another, with load, no pending --- - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 3; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 30; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 0; - -ALTER RESOURCE GROUP rg2_memory_test SET CONCURRENCY 2; -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 30; -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_SHARED_QUOTA 0; - -SELECT * FROM rg_mem_status; - -11: SET ROLE TO role1_memory_test; -11: BEGIN; --- proc11 gets a quota of 30%/3=10% from rg1 - -12: SET ROLE TO role1_memory_test; -12: BEGIN; --- proc12 gets a quota of 30%/3=10% from rg1 - -13: SET ROLE TO role1_memory_test; -13: BEGIN; --- proc13 gets a quota of 30%/3=10% from rg1 - --- although all the memory quota is in use, --- it's still allowed to decrease memory_limit, --- in such a case rg2 won't get the new quota until any query in rg1 ends. -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 15; -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 40; --- now both rg1 and rg2 still have 30% quota - -21: SET ROLE TO role2_memory_test; -21: BEGIN; --- proc21 gets a quota of 40%/2=20% from rg2 - -SELECT * FROM rg_mem_status; -SELECT * FROM rg_activity_status; - -11q: --- proc11 ends, 10%-5%=5% quota is returned to sys - -12q: --- proc12 ends, 10%-5%=5% quota is returned to sys - -SELECT * FROM rg_mem_status; -SELECT * FROM rg_activity_status; - --- now rg2 shall be able to get 10% free quota from sys -22: SET ROLE TO role2_memory_test; -22: BEGIN; --- proc22 gets a quota of 40%/2=20% from rg2 - -SELECT * FROM rg_mem_status; -SELECT * FROM rg_activity_status; - -13q: -21q: -22q: - --- --- 3.3) decrease one group and increase another, with load, with pending, --- memory_shared_quota is 0, --- waken up by released quota memory from other group --- - -ALTER RESOURCE GROUP rg2_memory_test SET CONCURRENCY 2; -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 30; -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_SHARED_QUOTA 0; - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 3; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 30; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 0; - -SELECT * FROM rg_mem_status; - -11: SET ROLE TO role1_memory_test; -11: BEGIN; --- proc11 gets a quota of 30%/3=10% from rg1 - -12: SET ROLE TO role1_memory_test; -12: BEGIN; --- proc12 gets a quota of 30%/3=10% from rg1 - -13: SET ROLE TO role1_memory_test; -13: BEGIN; --- proc13 gets a quota of 30%/3=10% from rg1 - --- although all the memory quota is in use, --- it's still allowed to decrease memory_limit, --- in such a case rg2 won't get the new quota until any query in rg1 ends. -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 15; -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 40; --- now both rg1 and rg2 still have 30% quota - -21: SET ROLE TO role2_memory_test; -21: BEGIN; - -22: SET ROLE TO role2_memory_test; -22&: BEGIN; - --- proc21 gets a quota of 40%/2=20% from rg2 --- proc22 requires a quota of 40%/2=20% from rg2, --- but as rg2 only has 30%-20%=10% free quota now, --- it shall be pending. -SELECT * FROM rg_mem_status; -SELECT * FROM rg_activity_status; - -11: END; -11q: --- proc11 ends, 10%-5%=5% quota is returned to sys - -SELECT * FROM rg_mem_status; -SELECT * FROM rg_activity_status; - -12: END; -12q: --- proc12 ends, 10%-5%=5% quota is returned to sys - --- now rg2 can get 10% free quota from sys --- so proc22 can get enough quota and get executed -22<: -SELECT * FROM rg_mem_status; -SELECT * FROM rg_activity_status; - -13q: -21q: -22q: - --- --- 3.4) decrease one group and increase another, with load, with pending, --- memory_shared_quota > 0 and can be freed, --- waken up by released shared quota memory from other group --- - -ALTER RESOURCE GROUP rg2_memory_test SET CONCURRENCY 2; -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 30; -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_SHARED_QUOTA 0; - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 1; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 30; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 60; - -SELECT * FROM rg_mem_status; - -11: SET ROLE TO role1_memory_test; -11: BEGIN; --- proc11 gets a quota of 30%*40%=12% from rg1 --- rg1 also has a shared quota of 30%*60%=18% - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 20; --- now each slot in rg1 requires a quota of 20%*40%=8% --- rg1 has 0% free quota and 20%*60%=12% free shared quota --- rg1 should release some shared quota, 30%*60%-20%*60%=6% - -ALTER RESOURCE GROUP rg2_memory_test SET CONCURRENCY 4; -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 40; --- now rg2 has a quota of 30%+6%=36% --- now each slot in rg2 requires a quota of 40%/4=10% - -21: SET ROLE TO role2_memory_test; -21: BEGIN; -22: SET ROLE TO role2_memory_test; -22: BEGIN; -23: SET ROLE TO role2_memory_test; -23: BEGIN; --- proc21~proc23 each gets a quota of 40%/4=10% --- rg2 still has 36%-10%*3=6% free quota - -24: SET ROLE TO role2_memory_test; -24&: BEGIN; --- proc24 shall be pending. - -SELECT * FROM rg_mem_status; -SELECT * FROM rg_activity_status; - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 30; --- now rg1 should release some shared quota, 20%*60%-20%*30%=6% --- now rg2 can get at most 6% new quota, but as it already has 36%, --- so rg2 actually gets 4% new quota. --- now rg2 has 40% quota, the free quota is 40%-30%=10%, --- just enough for proc24 to wake up. - -24<: -SELECT * FROM rg_mem_status; -SELECT * FROM rg_activity_status; - -11q: -21q: -22q: -23q: -24q: - --- --- 3.5) decrease one group and increase another, with load, with pending --- memory_shared_quota > 0 and can not be freed, --- waken up by released quota memory from other group --- - -ALTER RESOURCE GROUP rg2_memory_test SET CONCURRENCY 2; -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 30; -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_SHARED_QUOTA 0; - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 10; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 30; -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 90; - -SELECT * FROM rg_mem_status; - -11: SET ROLE TO role1_memory_test; -11: BEGIN; -11: SELECT hold_memory_by_percent(1,0.90); --- proc11 gets a quota of 30%*10%/10=0.3% from rg1 --- rg1 has a free quota of 30%*10%-0.3%=2.7% --- rg1 has a shared quota of 30%*90%=27%, --- free shared quota is 27%-(30%*90%-0.3%)=0.3% - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 20; --- now each slot in rg1 requires a quota of 20%*10%/10=0.2% --- rg1 releases some quota, 0.1%*9=0.9%, --- so new quota is 2.1%, new free quota is 2.1%-0.3%=1.8% --- rg1 releases some shared quota, 27%-max(20%*90%,26.7%)=0.3%, --- so new shared quota is 26.7%, new free shared quota is 0% - -ALTER RESOURCE GROUP rg2_memory_test SET CONCURRENCY 4; -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 40; --- now rg2 has a quota of 30%+1.2%=31.2% --- now each slot in rg2 requires a quota of 40%/4=10% - -21: SET ROLE TO role2_memory_test; -21: BEGIN; -22: SET ROLE TO role2_memory_test; -22: BEGIN; -23: SET ROLE TO role2_memory_test; -23: BEGIN; --- proc21~proc23 each gets a quota of 40%/4=10% --- rg2 still has 31.2%-10%*3=1.2% free quota - -24: SET ROLE TO role2_memory_test; -24&: BEGIN; --- proc24 shall be pending. - -SELECT * FROM rg_mem_status; -SELECT * FROM rg_activity_status; - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 30; --- rg1 can't free any shared quota as all of them are in use by proc11 - -SELECT * FROM rg_mem_status; -SELECT * FROM rg_activity_status; - -11q: --- rg1 releases 0.3%-0.2%=0.1% quota and 26.7%-18%=8.7% --- so rg2 gets 8.8% new quota --- now rg2 has 40% quota, free quota is 10% --- so proc24 shall be waken up - -24<: -SELECT * FROM rg_mem_status; -SELECT * FROM rg_activity_status; - -21q: -22q: -23q: -24q: - --- cleanup -DROP VIEW rg_mem_status; -DROP ROLE role1_memory_test; -DROP ROLE role2_memory_test; -DROP RESOURCE GROUP rg1_memory_test; -DROP RESOURCE GROUP rg2_memory_test; - --- --- Test PrepareTransaction report an error --- -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); -CREATE ROLE rg_test_role RESOURCE GROUP rg_test_group; - -SET debug_dtm_action = "fail_begin_command"; -SET debug_dtm_action_target = "protocol"; -SET debug_dtm_action_protocol = "prepare"; -SET debug_dtm_action_segment = 0; - --- ALTER should fail and the memory_limit in both catalog and share memory are --- still 5% -ALTER RESOURCE GROUP rg_test_group set memory_limit 1; - -RESET debug_dtm_action; -RESET debug_dtm_action_target; -RESET debug_dtm_action_protocol; -RESET debug_dtm_action_segment; - --- should still be 5% on both QD and QE -select memory_limit from gp_toolkit.gp_resgroup_config where groupname = 'rg_test_group'; - --- --- Test error happen on commit_prepare, DDL success after retry --- -SET debug_dtm_action = "fail_begin_command"; -SET debug_dtm_action_target = "protocol"; -SET debug_dtm_action_protocol = "commit_prepared"; -SET debug_dtm_action_segment = 0; - --- ALTER should success -ALTER RESOURCE GROUP rg_test_group set memory_limit 4; - -RESET debug_dtm_action; -RESET debug_dtm_action_target; -RESET debug_dtm_action_protocol; -RESET debug_dtm_action_segment; - --- should still be 4% on both QD and QE -select memory_limit from gp_toolkit.gp_resgroup_config where groupname = 'rg_test_group'; - -DROP ROLE rg_test_role; -DROP RESOURCE GROUP rg_test_group; diff --git a/src/test/isolation2/input/resgroup/resgroup_bypass.source b/src/test/isolation2/input/resgroup/resgroup_bypass.source index d22b09160cd..7674a547339 100644 --- a/src/test/isolation2/input/resgroup/resgroup_bypass.source +++ b/src/test/isolation2/input/resgroup/resgroup_bypass.source @@ -1,226 +1,2 @@ -DROP ROLE IF EXISTS role_bypass_test; --- start_ignore -DROP RESOURCE GROUP rg_bypass_test; --- end_ignore - --- --- setup --- - -CREATE RESOURCE GROUP rg_bypass_test WITH - (concurrency=2, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50); -CREATE ROLE role_bypass_test RESOURCE GROUP rg_bypass_test; - -CREATE OR REPLACE FUNCTION repeatPalloc(int, int) RETURNS int AS -'@abs_builddir@/../regress/regress@DLSUFFIX@', 'repeatPalloc' -LANGUAGE C READS SQL DATA; - -CREATE OR REPLACE FUNCTION hold_memory(int, int) RETURNS int AS $$ - SELECT * FROM repeatPalloc(1, $2) -$$ LANGUAGE sql; - -CREATE OR REPLACE VIEW eat_memory_on_qd_small AS - SELECT hold_memory(0,12); - -CREATE OR REPLACE VIEW eat_memory_on_qd_large AS - SELECT hold_memory(0,100); - -CREATE OR REPLACE VIEW eat_memory_on_one_slice AS - SELECT count(null) - FROM - gp_dist_random('gp_id') t1 - WHERE hold_memory(t1.dbid,4)=0 - ; - -CREATE OR REPLACE VIEW eat_memory_on_slices AS - SELECT count(null) - FROM - gp_dist_random('gp_id') t1, - gp_dist_random('gp_id') t2 - WHERE hold_memory(t1.dbid,4)=0 - AND hold_memory(t2.dbid,4)=0 - ; - -CREATE OR REPLACE FUNCTION round_test(float, integer) RETURNS float AS $$ - SELECT round($1 / $2) * $2 -$$ LANGUAGE sql; - -CREATE OR REPLACE VIEW memory_result AS - SELECT rsgname, ismaster, round_test(avg(memory_usage), 1) AS avg_mem - FROM( - SELECT rsgname, - CASE (j->'key')::text WHEN '"-1"'::text THEN 1 ELSE 0 END AS ismaster, - ((j->'value')->>'used')::int AS memory_usage - FROM( - SELECT rsgname, row_to_json(json_each(memory_usage::json)) AS j FROM - gp_toolkit.gp_resgroup_status - WHERE rsgname='rg_bypass_test' - )a - )b GROUP BY (rsgname, ismaster) ORDER BY rsgname, ismaster; - -GRANT ALL ON eat_memory_on_qd_small TO role_bypass_test; -GRANT ALL ON eat_memory_on_qd_large TO role_bypass_test; -GRANT ALL ON eat_memory_on_one_slice TO role_bypass_test; -GRANT ALL ON eat_memory_on_slices TO role_bypass_test; -GRANT ALL ON memory_result TO role_bypass_test; - --- --- SET command should be bypassed --- - -ALTER RESOURCE GROUP rg_bypass_test SET concurrency 0; -61: SET ROLE role_bypass_test; -61&: SELECT 1; -ALTER RESOURCE GROUP rg_bypass_test set concurrency 1; -61<: -ALTER RESOURCE GROUP rg_bypass_test set concurrency 0; -61: SET enable_hashagg to on; -61: SHOW enable_hashagg; -61: invalid_syntax; -61q: - --- --- gp_resource_group_bypass --- - -ALTER RESOURCE GROUP rg_bypass_test SET concurrency 0; -61: SET ROLE role_bypass_test; -61: SET gp_resource_group_bypass to on; -61: SHOW gp_resource_group_bypass; -61: CREATE TABLE table_bypass_test (c1 int); -61: INSERT INTO table_bypass_test SELECT generate_series(1,100); -61: SELECT count(*) FROM table_bypass_test; -61: DROP TABLE table_bypass_test; -61: SET gp_resource_group_bypass to off; -61: SHOW gp_resource_group_bypass; -61q: - --- --- gp_resource_group_bypass is not allowed inside a transaction block --- - -61: BEGIN; -61: SET gp_resource_group_bypass to on; -61: ABORT; -61q: - --- --- gp_resource_group_bypass is not allowed inside a function --- - -DROP FUNCTION IF EXISTS func_resgroup_bypass_test(int); -CREATE FUNCTION func_resgroup_bypass_test(c1 int) RETURNS INT AS $$ - SET gp_resource_group_bypass TO ON; /* inside a function */ - SELECT 1 -$$ LANGUAGE SQL; -SELECT func_resgroup_bypass_test(1); -DROP FUNCTION func_resgroup_bypass_test(int); - - --- --- memory limit in bypass mode, on qd --- --- orca will allocate 10M memory error buffer before optimization, and release --- it after that, so if optimizer is set to on, it will fail when the memory --- usage reaches 24M - -61: SET ROLE role_bypass_test; -61: SET gp_resource_group_bypass to on; -61: BEGIN; -61: SELECT * FROM eat_memory_on_qd_small; -SELECT * FROM memory_result; -61: SELECT * FROM eat_memory_on_qd_large; -SELECT * FROM memory_result; -61: ABORT; -61: BEGIN; -SELECT 1 FROM memory_result where avg_mem > 10 and ismaster = 1; -61q: - --- --- memory limit in bypass mode, on one slice --- - -61: SET ROLE role_bypass_test; -61: SET gp_resource_group_bypass to on; -61: BEGIN; -61: SELECT * FROM eat_memory_on_one_slice; -SELECT * FROM memory_result; -61: SELECT * FROM eat_memory_on_one_slice; -SELECT * FROM memory_result; -61: SELECT * FROM eat_memory_on_one_slice; -SELECT * FROM memory_result; -61: ABORT; -61: BEGIN; -SELECT * FROM memory_result; -61q: - --- --- memory limit in bypass mode, on slices --- - -61: SET ROLE role_bypass_test; -61: SET gp_resource_group_bypass to on; -61: BEGIN; -61: SELECT * FROM eat_memory_on_slices; -SELECT * FROM memory_result; -61: SELECT * FROM eat_memory_on_slices; -SELECT * FROM memory_result; -61: SELECT * FROM eat_memory_on_slices; -SELECT * FROM memory_result; -61: ABORT; -61: BEGIN; -SELECT * FROM memory_result; -61q: - --- --- gp_resgroup_status.num_running is updated in bypass mode --- - -61: SET ROLE role_bypass_test; -61: SET gp_resource_group_bypass to on; -61&: SELECT pg_sleep(10); -62: SET ROLE role_bypass_test; -62: SET gp_resource_group_bypass to on; -62&: SELECT pg_sleep(20); -SELECT num_running FROM gp_toolkit.gp_resgroup_status - WHERE rsgname='rg_bypass_test'; -SELECT pg_cancel_backend(pid) FROM pg_stat_activity - WHERE rsgname='rg_bypass_test'; -61<: -62<: -61q: -62q: - --- --- pg_stat_activity is updated in bypass mode --- - -61: SET ROLE role_bypass_test; -61: SET gp_resource_group_bypass to on; -61&: SELECT pg_sleep(10); -62: SET ROLE role_bypass_test; -62: SET gp_resource_group_bypass to on; -62&: SELECT pg_sleep(20); -SELECT query FROM pg_stat_activity - WHERE rsgname='rg_bypass_test'; -SELECT pg_cancel_backend(pid) FROM pg_stat_activity - WHERE rsgname='rg_bypass_test'; -61<: -62<: -61q: -62q: - --- --- cleanup --- - -REVOKE ALL ON eat_memory_on_qd_small FROM role_bypass_test; -REVOKE ALL ON eat_memory_on_qd_large FROM role_bypass_test; -REVOKE ALL ON eat_memory_on_one_slice FROM role_bypass_test; -REVOKE ALL ON eat_memory_on_slices FROM role_bypass_test; -REVOKE ALL ON memory_result FROM role_bypass_test; - -DROP ROLE role_bypass_test; -DROP RESOURCE GROUP rg_bypass_test; - --- vi:filetype=sql: +-- RG FIXME: The bypass mode will be re-design in the latest future, so just remove all the test case, because this +-- file is deeply dependent on the memory model which has been removed in this PR. \ No newline at end of file diff --git a/src/test/isolation2/input/resgroup/resgroup_cpu_rate_limit.source b/src/test/isolation2/input/resgroup/resgroup_cpu_rate_limit.source index 6d927207991..6c136a09545 100644 --- a/src/test/isolation2/input/resgroup/resgroup_cpu_rate_limit.source +++ b/src/test/isolation2/input/resgroup/resgroup_cpu_rate_limit.source @@ -85,14 +85,15 @@ CREATE VIEW cancel_all AS WHERE query LIKE 'SELECT * FROM % WHERE busy%'; -- create two resource groups -CREATE RESOURCE GROUP rg1_cpu_test WITH (concurrency=5, cpu_rate_limit=10, memory_limit=20); -CREATE RESOURCE GROUP rg2_cpu_test WITH (concurrency=5, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg1_cpu_test WITH (concurrency=5, cpu_hard_quota_limit=-1, cpu_soft_priority=100); +CREATE RESOURCE GROUP rg2_cpu_test WITH (concurrency=5, cpu_hard_quota_limit=-1, cpu_soft_priority=200); -- -- check gpdb cgroup configuration -- DO LANGUAGE PLPYTHON3U $$ import subprocess + import os cgroot = '@cgroup_mnt_point@' @@ -116,7 +117,7 @@ DO LANGUAGE PLPYTHON3U $$ shares = get_cgroup_prop('/cpu/gpdb/cpu.shares') # get system props - ncores = int(run_command('nproc')) + ncores = os.cpu_count() # get global gucs gp_resource_group_cpu_limit = float(show_guc('gp_resource_group_cpu_limit')) @@ -128,20 +129,19 @@ DO LANGUAGE PLPYTHON3U $$ # shares := 1024 * gp_resource_group_cpu_priority assert shares == 1024 * gp_resource_group_cpu_priority - # SUB/shares := TOP/shares * cpu_rate_limit def check_group_shares(name): - cpu_rate_limit = int(plpy.execute(''' + cpu_soft_priority = int(plpy.execute(''' SELECT value FROM pg_resgroupcapability c, pg_resgroup g WHERE c.resgroupid=g.oid - AND reslimittype=2 + AND reslimittype=3 AND g.rsgname='{}' '''.format(name))[0]['value']) oid = int(plpy.execute(''' SELECT oid FROM pg_resgroup WHERE rsgname='{}' '''.format(name))[0]['oid']) sub_shares = get_cgroup_prop('/cpu/gpdb/{}/cpu.shares'.format(oid)) - assert sub_shares == int(shares * cpu_rate_limit / 100) + assert sub_shares == int(cpu_soft_priority * 1024 / 100) # check default groups check_group_shares('default_group') @@ -152,8 +152,8 @@ DO LANGUAGE PLPYTHON3U $$ check_group_shares('rg2_cpu_test') $$; --- lower admin_group's cpu_rate_limit to minimize its side effect -ALTER RESOURCE GROUP admin_group SET cpu_rate_limit 1; +-- lower admin_group's cpu_hard_quota_limit to minimize its side effect +ALTER RESOURCE GROUP admin_group SET cpu_hard_quota_limit 1; -- create two roles and assign them to above groups CREATE ROLE role1_cpu_test RESOURCE GROUP rg1_cpu_test; @@ -180,16 +180,6 @@ GRANT ALL ON FUNCTION busy() TO role2_cpu_test; -- on empty load the cpu usage shall be 0% -- --- --- a group should burst to use all the cpu usage --- when it's the only one with running queries. --- --- however the overall cpu usage is controlled by a GUC --- gp_resource_group_cpu_limit which is 90% by default. --- --- so the cpu usage shall be 90% --- - 10&: SELECT * FROM gp_dist_random('gp_id') WHERE busy() IS NULL; 11&: SELECT * FROM gp_dist_random('gp_id') WHERE busy() IS NULL; 12&: SELECT * FROM gp_dist_random('gp_id') WHERE busy() IS NULL; @@ -197,6 +187,7 @@ GRANT ALL ON FUNCTION busy() TO role2_cpu_test; 14&: SELECT * FROM gp_dist_random('gp_id') WHERE busy() IS NULL; -- start_ignore +-- Gather CPU usage statistics into cpu_usage_samples TRUNCATE TABLE cpu_usage_samples; SELECT fetch_sample(); SELECT pg_sleep(1.7); @@ -247,9 +238,9 @@ SELECT * FROM cancel_all; -- -- when there are multiple groups with parallel queries, --- they should share the cpu usage by their cpu_usage settings, +-- they should share the cpu usage by their cpu_soft_priority settings, -- --- rg1_cpu_test:rg2_cpu_test is 0.1:0.2 => 1:2, so: +-- rg1_cpu_test:rg2_cpu_test is 100:200 => 1:2, so: -- -- - rg1_cpu_test gets 90% * 1/3 => 30%; -- - rg2_cpu_test gets 90% * 2/3 => 60%; @@ -324,10 +315,11 @@ SELECT * FROM cancel_all; 24q: -- end_ignore --- start_ignore -! gpconfig -c gp_resource_group_cpu_ceiling_enforcement -v on; -! gpstop -rai; --- end_ignore + + +-- Test hard quota limit +ALTER RESOURCE GROUP rg1_cpu_test set cpu_hard_quota_limit 10; +ALTER RESOURCE GROUP rg2_cpu_test set cpu_hard_quota_limit 20; -- prepare parallel queries in the two groups 10: SET ROLE TO role1_cpu_test; @@ -385,6 +377,7 @@ SELECT * FROM cancel_all; 1:SELECT pg_sleep(1.7); -- end_ignore +-- verify it 1:SELECT verify_cpu_usage('rg1_cpu_test', 10, 2); -- start_ignore @@ -411,9 +404,9 @@ SELECT * FROM cancel_all; -- -- when there are multiple groups with parallel queries, --- they should follow the ceiling enforcement of the cpu usage. +-- they should follow the enforcement of the cpu usage. -- --- rg1_cpu_test:rg2_cpu_test is 0.1:0.2, so: +-- rg1_cpu_test:rg2_cpu_test is 10:20, so: -- -- - rg1_cpu_test gets 10%; -- - rg2_cpu_test gets 20%; @@ -490,13 +483,8 @@ SELECT * FROM cancel_all; 1q: -- end_ignore --- start_ignore -! gpconfig -c gp_resource_group_cpu_ceiling_enforcement -v off; -! gpstop -rai; --- end_ignore - --- restore admin_group's cpu_rate_limit -2:ALTER RESOURCE GROUP admin_group SET cpu_rate_limit 10; +-- restore admin_group's cpu_hard_quota_limit +2:ALTER RESOURCE GROUP admin_group SET cpu_hard_quota_limit 10; -- cleanup 2:REVOKE ALL ON FUNCTION busy() FROM role1_cpu_test; diff --git a/src/test/isolation2/input/resgroup/resgroup_cpuset.source b/src/test/isolation2/input/resgroup/resgroup_cpuset.source index 80dda6dc8db..e01d3637c19 100644 --- a/src/test/isolation2/input/resgroup/resgroup_cpuset.source +++ b/src/test/isolation2/input/resgroup/resgroup_cpuset.source @@ -6,11 +6,9 @@ DROP TABLE IF EXISTS bigtable; CREATE LANGUAGE plpython3u; -- end_ignore -CREATE RESOURCE GROUP rg1_cpuset_test WITH (memory_limit = 10, cpuset='0'); -CREATE ROLE role1_cpuset_test RESOURCE GROUP rg1_cpuset_test; --- check whether the queries running in the specific group on the specific core set --- @param grp: the resource group queries running in +-- check whether the queries running on the specific core set +-- @param grp: the resource group name queries running in -- @param cpuset: cpu cores which the queries should only be run on them, e.g. 0,1 -- @return bool: true/false indicating whether it corresponds to the rule CREATE FUNCTION check_cpuset(grp TEXT, cpuset TEXT) RETURNS BOOL AS $$ @@ -23,6 +21,8 @@ conn = pg.connect(dbname="isolation2resgrouptest") pt = re.compile(r'con(\d+)') def check(expect_cpus, sess_ids): + # use ps -eF to find all processes which belongs to postgres and in the given sessions + procs = subprocess.check_output(['ps', '-eF']).decode().split('\n') head, proc_stats = procs[0], procs[1:] PSR = [id for id, attr in enumerate(head.split()) if attr.strip() == 'PSR'][0] @@ -30,7 +30,7 @@ def check(expect_cpus, sess_ids): for proc_stat in proc_stats if 'postgres' in proc_stat and pt.findall(proc_stat) and - set(pt.findall(proc_stat)).issubset(sess_ids)] + sess_ids.issubset(set(pt.findall(proc_stat)))] return set(cpus).issubset(set(expect_cpus)) def get_all_sess_ids_in_group(group_name): @@ -38,7 +38,6 @@ def get_all_sess_ids_in_group(group_name): result = conn.query(sql).getresult() return set([str(r[0]) for r in result]) -expect_cpu = [] conf = cpuset if conf == '': fd = open("@cgroup_mnt_point@/cpuset/gpdb/cpuset.cpus") @@ -47,6 +46,9 @@ if conf == '': conf = line.strip('\n') tokens = conf.split(",") + +expect_cpu = [] + for token in tokens: if token.find('-') != -1: interval = token.split("-") @@ -76,7 +78,7 @@ line = fd.readline() fd.close() line = line.strip('\n') sql = "create resource group " + grp + " with (" \ - + "memory_limit=1, cpuset='" + line + "')" + + "cpuset='" + line + "')" result = conn.query(sql) file = "@cgroup_mnt_point@/cpuset/gpdb/1/cpuset.cpus" @@ -130,6 +132,7 @@ def get_cgroup_cpuset(group): conn = pg.connect(dbname="isolation2resgrouptest") config_groups = get_all_group_which_cpuset_is_set() groups_cpuset = set([]) + # check whether cpuset in config and cgroup are same, and have no overlap for config_group in config_groups: groupid = config_group[0] @@ -141,6 +144,7 @@ for config_group in config_groups: groups_cpuset |= cgroup_cpuset if not(config_cpuset.issubset(cgroup_cpuset) and cgroup_cpuset.issubset(config_cpuset)): return False + # check whether cpuset in resource group union default group is universal set default_cpuset = get_cgroup_cpuset(1) all_cpuset = get_cgroup_cpuset(0) @@ -181,6 +185,9 @@ CREATE VIEW cancel_all AS FROM pg_stat_activity WHERE query LIKE 'SELECT * FROM busy%'; +CREATE RESOURCE GROUP rg1_cpuset_test WITH (cpuset='0'); +CREATE ROLE role1_cpuset_test RESOURCE GROUP rg1_cpuset_test; + GRANT ALL ON busy TO role1_cpuset_test; -- we suppose core 0 & 1 are available @@ -205,8 +212,8 @@ select pg_sleep(2); 11: SELECT check_cpuset('rg1_cpuset_test', '0,1'); 11: END; --- change to cpu_rate_limit while the transaction is running -ALTER RESOURCE GROUP rg1_cpuset_test SET cpu_rate_limit 10; +-- change to cpu_hard_quota_limit while the transaction is running +ALTER RESOURCE GROUP rg1_cpuset_test SET cpu_hard_quota_limit 70; -- cancel the transaction -- start_ignore @@ -217,7 +224,7 @@ select * from cancel_all; 11q: -- end_ignore --- test whether the cpu_rate_limit had taken effect +-- test whether the cpu_hard_quota_limit had taken effect 10: SET ROLE TO role1_cpuset_test; 10: BEGIN; 10&: SELECT * FROM busy; @@ -258,15 +265,15 @@ select * from cancel_all; -- default group value must be valid -- suppose the cores numbered 0 & 1 are available SELECT check_rules(); -CREATE RESOURCE GROUP rg1_test_group WITH (memory_limit=10, cpuset='0'); +CREATE RESOURCE GROUP rg1_test_group WITH (cpuset='0'); SELECT check_rules(); -CREATE RESOURCE GROUP rg2_test_group WITH (memory_limit=10, cpuset='1'); +CREATE RESOURCE GROUP rg2_test_group WITH (cpuset='1'); SELECT check_rules(); -ALTER RESOURCE GROUP rg1_test_group SET cpu_rate_limit 1; +ALTER RESOURCE GROUP rg1_test_group SET cpu_hard_quota_limit 1; SELECT check_rules(); ALTER RESOURCE GROUP rg1_test_group SET cpuset '0'; SELECT check_rules(); -ALTER RESOURCE GROUP rg1_test_group SET cpu_rate_limit 1; +ALTER RESOURCE GROUP rg1_test_group SET cpu_hard_quota_limit 1; SELECT check_rules(); DROP RESOURCE GROUP rg1_test_group; SELECT check_rules(); @@ -281,7 +288,7 @@ SELECT check_rules(); -- negative: simulate DDL fail -- create fail SELECT gp_inject_fault('create_resource_group_fail', 'error', 1); -CREATE RESOURCE GROUP rg1_test_group WITH (memory_limit=10, cpuset='0'); +CREATE RESOURCE GROUP rg1_test_group WITH (cpuset='0'); SELECT groupid, groupname, cpuset FROM gp_toolkit.gp_resgroup_config WHERE cpuset != '-1'; diff --git a/src/test/isolation2/input/resgroup/resgroup_memory_limit.source b/src/test/isolation2/input/resgroup/resgroup_memory_limit.source deleted file mode 100644 index 351113dec98..00000000000 --- a/src/test/isolation2/input/resgroup/resgroup_memory_limit.source +++ /dev/null @@ -1,504 +0,0 @@ --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; -DROP RESOURCE GROUP rg2_memory_test; --- end_ignore - -CREATE OR REPLACE FUNCTION resGroupPalloc(float) RETURNS int AS -'@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc' -LANGUAGE C READS SQL DATA; - -CREATE OR REPLACE FUNCTION hold_memory_by_percent(float) RETURNS int AS $$ - SELECT * FROM resGroupPalloc($1) -$$ LANGUAGE sql; - -CREATE OR REPLACE VIEW rg_mem_status AS - SELECT groupname, memory_limit, memory_shared_quota - FROM gp_toolkit.gp_resgroup_config - WHERE groupname='rg1_memory_test' OR groupname='rg2_memory_test' - ORDER BY groupid; - -CREATE OR REPLACE VIEW memory_result AS SELECT rsgname, memory_usage from gp_toolkit.gp_resgroup_status; - --- 1) single allocation --- Group Share Quota = 0 --- Global Share Quota > 0 --- Slot Quota > 0 --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 52% => 52% --- rg1's slot quota: 52% / 2 * 2 => 52% --- rg1's single slot quota: 52% / 2 => 26% --- rg1's shared quota: %52 - %52 => %0 --- system free chunks: 100% - 10% - 30% - 52% => 8% --- memory available to one slot in rg1: 52%/2 + 0% + 8% => 34% -CREATE RESOURCE GROUP rg1_memory_test - WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=52, memory_shared_quota=0); -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; - --- 1a) on QD -1: SET ROLE TO role1_memory_test; -1: SELECT hold_memory_by_percent(0.14 / 0.52); -1: SELECT hold_memory_by_percent(0.14 / 0.52); -1: SELECT hold_memory_by_percent(0.14 / 0.52); -1q: - -1: SET ROLE TO role1_memory_test; -1: BEGIN; -1: SELECT hold_memory_by_percent(0.14 / 0.52); -1: SELECT hold_memory_by_percent(0.14 / 0.52); -1: SELECT hold_memory_by_percent(0.14 / 0.52); -1q: - -1: SET ROLE TO role1_memory_test; -1: SELECT hold_memory_by_percent(0.42 / 0.52); -1q: - --- 1b) on QEs -1: SET ROLE TO role1_memory_test; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.14 / 0.52)=0; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.14 / 0.52)=0; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.14 / 0.52)=0; -1q: - -1: SET ROLE TO role1_memory_test; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.42 / 0.52)=0; -1q: - -DROP ROLE role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; - --- 2) single allocation --- Group Share Quota > 0 --- Global Share Quota > 0 --- Slot Quota > 0 --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 52 / 100 => 52% --- rg1's slot quota: 52% * 60 /100 => 31% --- rg1's single slot quota: 31% / 2 => 15.5% --- rg1's shared quota: 52% - 31% => 21% --- system free chunks: 100% - 10% - 30% - 52% => 8% --- memory available to one slot in rg1: 15.5% + 21% + 8% => 44.5% - -CREATE RESOURCE GROUP rg1_memory_test - WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=52, memory_shared_quota=40); -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; - --- 2a) on QD -1: SET ROLE TO role1_memory_test; -1: SELECT hold_memory_by_percent(0.12 / 0.52); -1: SELECT hold_memory_by_percent(0.12 / 0.52); -1: SELECT hold_memory_by_percent(0.12 / 0.52); -1: SELECT hold_memory_by_percent(0.12 / 0.52); -1q: - -1: SET ROLE TO role1_memory_test; -1: BEGIN; -1: SELECT hold_memory_by_percent(0.12 / 0.52); -1: SELECT hold_memory_by_percent(0.12 / 0.52); -1: SELECT hold_memory_by_percent(0.12 / 0.52); -1: SELECT hold_memory_by_percent(0.12 / 0.52); -1q: - -1: SET ROLE TO role1_memory_test; -1: SELECT hold_memory_by_percent(0.48 / 0.52); -1q: - --- 2b) on QEs -1: SET ROLE TO role1_memory_test; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.12 / 0.52)=0; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.12 / 0.52)=0; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.12 / 0.52)=0; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.12 / 0.52)=0; -1q: - -1: SET ROLE TO role1_memory_test; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.48 / 0.52)=0; -1q: - -DROP ROLE role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; - --- 3) single allocation --- Group Share Quota > 0 --- Global Share Quota > 0 --- Slot Quota = 0 --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 52 / 100 => 52% --- rg1's slot quota: 0 --- rg1's shared quota: 52% --- system free chunks: 100% - 10% - 30% - 52% => 8% --- memory available to one slot in rg1: 52% + 8% => 60% - -CREATE RESOURCE GROUP rg1_memory_test - WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=52, memory_shared_quota=100); -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; - --- 3a) on QD -1: SET ROLE TO role1_memory_test; -1: SELECT hold_memory_by_percent(0.25 / 0.52); -1: SELECT hold_memory_by_percent(0.25 / 0.52); -1: SELECT hold_memory_by_percent(0.25 / 0.52); -1q: - -1: SET ROLE TO role1_memory_test; -1: BEGIN; -1: SELECT hold_memory_by_percent(0.25 / 0.52); -1: SELECT hold_memory_by_percent(0.25 / 0.52); -1: SELECT hold_memory_by_percent(0.25 / 0.52); -1q: - -1: SET ROLE TO role1_memory_test; -1: SELECT hold_memory_by_percent(0.75 / 0.52); -1q: - --- 3b) on QEs -1: SET ROLE TO role1_memory_test; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.25 / 0.52)=0; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.25 / 0.52)=0; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.25 / 0.52)=0; -1q: - -1: SET ROLE TO role1_memory_test; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.75 / 0.52)=0; -1q: - -DROP ROLE role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; - --- 4) multi allocation in one group --- Group Share Quota = 0 --- Global Share Quota > 0 --- Slot Quota > 0 --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 52 / 100 => 52% --- rg1's slot quota: 52% / 2 * 2 => 52% --- rg1's single slot quota: 52% / 2 => 26% --- rg1's shared quota: 0 --- system free chunks: 100% - 10% - 30% - 52% => 8% --- memory available to one slot in rg1: 26% + 8% => 34% - -CREATE RESOURCE GROUP rg1_memory_test - WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=52, memory_shared_quota=0); -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; - --- 4a) on QD --- not exceed the global share -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role1_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT hold_memory_by_percent(0.28 / 0.52); -2: SELECT hold_memory_by_percent(0.28 / 0.52); -1q: -2q: - --- exceed the global share -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role1_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT hold_memory_by_percent(0.32 / 0.52); -2: SELECT hold_memory_by_percent(0.32 / 0.52); -1q: -2q: - --- allocate serially -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role1_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT hold_memory_by_percent(0.32 / 0.52); -1q: -SELECT pg_sleep(1); -2: SELECT hold_memory_by_percent(0.32 / 0.52); -2q: - --- 4b) on QEs --- not exceed the global share -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role1_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.28 / 0.52)=0; -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.28 / 0.52)=0; -1q: -2q: - --- exceed the global share -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role1_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.32 / 0.52)=0; -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.32 / 0.52)=0; -1q: -2q: - --- allocate serially -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role1_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.32 / 0.52)=0; -1q: -SELECT pg_sleep(1); -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.32 / 0.52)=0; -2q: - -DROP ROLE role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; - --- 5) multi allocation in one group --- Group Share Quota > 0 --- Global Share Quota > 0 --- Slot Quota > 0 --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 52 / 100 => 52% --- rg1's slot quota: 52% * 50 / 100 => 26% --- rg1's single slot quota: 26% / 2 => 13% --- rg1's shared quota: 52% - 13% * 2 => 26% --- system free chunks: 100% - 10% - 30% - 52% => 8% --- memory available to one slot in rg1: 13% + 26% + 8% => 47% - -CREATE RESOURCE GROUP rg1_memory_test - WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=52, memory_shared_quota=50); -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; - --- 5a) on QD --- not exceed the global share -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role1_memory_test; -1: BEGIN; -2: BEGIN; --- reserve all the group shared quota -1: SELECT hold_memory_by_percent(0.39 / 0.52); --- must allocate from global share -2: SELECT hold_memory_by_percent(0.2 / 0.52); -1q: -2q: - --- exceed the global share -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role1_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT hold_memory_by_percent(0.39 / 0.52); -2: SELECT hold_memory_by_percent(0.39 / 0.52); -1q: -2q: - --- allocate serially -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role1_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT hold_memory_by_percent(0.39 / 0.52); -1q: -SELECT pg_sleep(1); -2: SELECT hold_memory_by_percent(0.39 / 0.52); -2q: - --- 5b) on QEs --- not exceed the global share -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role1_memory_test; -1: BEGIN; -2: BEGIN; --- reserve all the group shared quota -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.39 / 0.52)=0; --- must allocate from global share -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.2 / 0.52)=0; -1q: -2q: - --- exceed the global share -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role1_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.39 / 0.52)=0; -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.39 / 0.52)=0; -1q: -2q: - --- allocate serially -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role1_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.39 / 0.52)=0; -1q: -SELECT pg_sleep(1); -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.39 / 0.52)=0; -2q: - -DROP ROLE role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; - --- 6) multi allocation in different group --- Group Share Quota > 0 --- Global Share Quota > 0 --- Slot Quota > 0 --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 20 / 100 => 20% --- rg1's slot quota: 20% * 60 / 100 / 2 * 2 => 12% --- rg1's single slot quota: 12% / 2 => 6% --- rg1's shared quota: 20% - 6% * 2 => 8% --- rg2 same as rg1 --- system free chunks: 100% - 10% - 30% - 100%*20/100 - 100%*20/100 => 20% --- memory available to one slot in rg1/rg2: 6% + 8% + 20% => 34% - -CREATE RESOURCE GROUP rg1_memory_test - WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=20, memory_shared_quota=40); -CREATE RESOURCE GROUP rg2_memory_test - WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=20, memory_shared_quota=40); -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE ROLE role2_memory_test RESOURCE GROUP rg2_memory_test; - --- 6a) on QD --- not exceed the global share -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role2_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT hold_memory_by_percent(0.2 / 0.2); -2: SELECT hold_memory_by_percent(0.2 / 0.2); -1q: -2q: - --- exceed the global share -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role2_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT hold_memory_by_percent(0.3 / 0.2); -2: SELECT hold_memory_by_percent(0.3 / 0.2); -1q: -2q: - --- allocate serially -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role2_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT hold_memory_by_percent(0.3 / 0.2); -1q: -SELECT pg_sleep(1); -2: SELECT hold_memory_by_percent(0.3 / 0.2); -2q: - --- 6b) on QEs --- not exceed the global share -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role2_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.2 / 0.2)=0; -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.2 / 0.2)=0; -1q: -2q: - --- exceed the global share -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role2_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3 / 0.2)=0; -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3 / 0.2)=0; -1q: -2q: - --- allocate serially -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role2_memory_test; -1: BEGIN; -2: BEGIN; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3 / 0.2)=0; -1q: -SELECT pg_sleep(1); -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3 / 0.2)=0; -2q: - -DROP ROLE role1_memory_test; -DROP ROLE role2_memory_test; -DROP RESOURCE GROUP rg1_memory_test; -DROP RESOURCE GROUP rg2_memory_test; - --- 7) DBA can increase global shared memory by decreasing --- any existing group_memory_limit --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 30 / 100 => 30% --- rg1's slot quota: 30% --- rg1's single slot quota: 30% / 2 => 15% --- rg1's shared quota: 0 --- rg2 same as rg1 --- system free chunks: 100% - 10% - 30% - 30% - 30% => 0 --- memory available to one slot in rg1/rg2: 15% + 0 + 0 => 15% - -CREATE RESOURCE GROUP rg1_memory_test - WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=30, memory_shared_quota=0); -CREATE RESOURCE GROUP rg2_memory_test - WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=30, memory_shared_quota=0); -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE ROLE role2_memory_test RESOURCE GROUP rg2_memory_test; - --- 7a) on QD --- not enough memory -1: SET ROLE TO role1_memory_test; -1: SELECT hold_memory_by_percent(0.2 / 0.3); -1q: - --- alter rg2 memory_limit so last query has enough memory -ALTER RESOURCE GROUP rg2_memory_test SET memory_limit 20; --- system free chunks: 100% - 10% - 30% - 30% - 20% => 10% --- memory available to one slot in rg1/rg2: 15% + 0 + 10% => 25% - --- enough memory for allocating -1: SET ROLE TO role1_memory_test; -1: SELECT hold_memory_by_percent(0.2 / 0.3); -1q: - --- 7b) on QEs --- not enough memory -ALTER RESOURCE GROUP rg2_memory_test SET memory_limit 30; -1: SET ROLE TO role1_memory_test; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.2 / 0.3)=0; -1q: - --- alter rg2 memory_limit so last query has enough memory -ALTER RESOURCE GROUP rg2_memory_test SET memory_limit 20; --- system free chunks: 100% - 10% - 30% - 30% - 20% => 10% --- memory available to one slot in rg1/rg2: 15% + 0 + 10% => 25% - --- enough memory for allocating -1: SET ROLE TO role1_memory_test; -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.2 / 0.3)=0; -1q: - -DROP ROLE role1_memory_test; -DROP ROLE role2_memory_test; -DROP RESOURCE GROUP rg1_memory_test; -DROP RESOURCE GROUP rg2_memory_test; diff --git a/src/test/isolation2/input/resgroup/resgroup_memory_runaway.source b/src/test/isolation2/input/resgroup/resgroup_memory_runaway.source deleted file mode 100644 index 0e99446ab25..00000000000 --- a/src/test/isolation2/input/resgroup/resgroup_memory_runaway.source +++ /dev/null @@ -1,171 +0,0 @@ --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; -DROP RESOURCE GROUP rg2_memory_test; --- end_ignore - -CREATE OR REPLACE FUNCTION resGroupPalloc(float) RETURNS int AS -'@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc' -LANGUAGE C READS SQL DATA; - -CREATE OR REPLACE FUNCTION hold_memory_by_percent(float) RETURNS int AS $$ - SELECT * FROM resGroupPalloc($1) -$$ LANGUAGE sql; - -CREATE OR REPLACE VIEW rg_mem_status AS - SELECT groupname, memory_limit, memory_shared_quota - FROM gp_toolkit.gp_resgroup_config - WHERE groupname='rg1_memory_test' OR groupname='rg2_memory_test' - ORDER BY groupid; - -CREATE OR REPLACE VIEW memory_result AS SELECT rsgname, memory_usage from gp_toolkit.gp_resgroup_status; - --- start_ignore -! gpconfig -c runaway_detector_activation_percent -v 50; -! gpstop -ari; --- end_ignore - --- after the restart we need a new connection to run the queries --- 1) single allocation --- Group Share Quota = 0 --- Global Share Quota > 0 --- Slot Quota > 0 --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 20% => 20% --- rg1's slot quota: 20% / 2 * 2 => 20% --- rg1's single slot quota: 20% / 2 => 10% --- rg1's shared quota: 20% - 20% => %0 --- system free chunks: 100% - 10% - 30% - 20% => 40% --- global area safe threshold: 40% / 2 = 20% -1: CREATE RESOURCE GROUP rg1_memory_test - WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=20, memory_shared_quota=0); -1: CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; --- 1a) on QD -1: SET ROLE TO role1_memory_test; -1: SELECT hold_memory_by_percent(1.0); -1: SELECT hold_memory_by_percent(0.3); -1: SELECT hold_memory_by_percent(0.3); -1q: - --- 1b) on QEs -2: SELECT pg_sleep(1); -2: SET ROLE TO role1_memory_test; -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(1.0)=0; -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3)=0; -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3)=0; -2q: - -0: DROP ROLE role1_memory_test; -0: DROP RESOURCE GROUP rg1_memory_test; -0q: - - --- we assume system total chunks is 100% --- rg1's expected: 100% * 20% => 20% --- rg1's slot quota: 20% / 2 => 10% --- rg1's single slot quota: 10% / 2 => 5% --- rg1's shared quota: %20 - %10 => %10 --- system free chunks: 100% - 10% - 30% - 20% => 40% --- safe threshold: 40% / 2 = 20% -1: CREATE RESOURCE GROUP rg1_memory_test - WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=20, memory_shared_quota=50); -1: CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; --- 1a) on QD -1: SET ROLE TO role1_memory_test; -1: SELECT hold_memory_by_percent(1.0); -1: SELECT hold_memory_by_percent(0.3); -1: SELECT hold_memory_by_percent(0.3); -1: SELECT hold_memory_by_percent(0.3); -1q: - --- 1b) on QEs -2: SELECT pg_sleep(1); -2: SET ROLE TO role1_memory_test; -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(1.0)=0; -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3)=0; -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3)=0; -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3)=0; -2q: - -0: DROP ROLE role1_memory_test; -0: DROP RESOURCE GROUP rg1_memory_test; -0q: - - - --- we assume system total chunks is 100% --- rg1's expected: 100% * 20% => 20% --- rg1's slot quota: 20% / 2 => 10% --- rg1's single slot quota: 10% / 2 => 5% --- rg1's shared quota: %20 - %10 => %10 --- rg2's expected: 100% * 20% => 20% --- system free chunks: 100% - 10% - 30% - 20% - 20%=> 20% --- safe threshold: 20% / 2 = 10% -1: CREATE RESOURCE GROUP rg1_memory_test - WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=20, memory_shared_quota=50); -1: CREATE RESOURCE GROUP rg2_memory_test - WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=20, memory_shared_quota=0); -1: CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; --- 1a) on QD -1: SET ROLE TO role1_memory_test; -1: SELECT hold_memory_by_percent(1.0); -1: SELECT hold_memory_by_percent(0.15); -1: SELECT hold_memory_by_percent(0.15); -1q: - --- 1b) on QEs -2: SELECT pg_sleep(1); -2: SET ROLE TO role1_memory_test; -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(1.0)=0; -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.15)=0; -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.15)=0; -2q: - -0: DROP ROLE role1_memory_test; -0: DROP RESOURCE GROUP rg1_memory_test; -0: DROP RESOURCE GROUP rg2_memory_test; -0q: - --- test for the rounding issue of runaway_detector_activation_percent --- when calculating safeChunksThreshold, we used to multiply --- runaway_detector_activation_percent and then divide 100. This will --- cause the small chunks to be rounded to zero. --- set runaway_detector_activation_percent to 99 to enlarge the rounding --- issue - --- start_ignore -! gpconfig -c runaway_detector_activation_percent -v 99; -! gpstop -ari; --- end_ignore - -1: CREATE RESOURCE GROUP rg1_memory_test - WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=60, memory_shared_quota=50); -1: CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; --- trigger small chunks rounding issue by reducing memory limit in small step --- while increasing memory limit in big step. -1: ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 57; -1: ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 54; -1: ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 51; -1: ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 48; -1: ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 60; --- 1a) on QD -1: SET ROLE TO role1_memory_test; -1: SELECT hold_memory_by_percent(0.1); -1: SELECT hold_memory_by_percent(0.1); -1q: - -0: DROP ROLE role1_memory_test; -0: DROP RESOURCE GROUP rg1_memory_test; -0q: - --- start_ignore -! gpconfig -c runaway_detector_activation_percent -v 100; -! gpstop -ari; --- end_ignore diff --git a/src/test/isolation2/input/resgroup/resgroup_memory_statistic.source b/src/test/isolation2/input/resgroup/resgroup_memory_statistic.source deleted file mode 100644 index d4ca737dbf7..00000000000 --- a/src/test/isolation2/input/resgroup/resgroup_memory_statistic.source +++ /dev/null @@ -1,155 +0,0 @@ -DROP ROLE IF EXISTS role1_memory_test; -DROP ROLE IF EXISTS role2_memory_test; --- start_ignore -DROP RESOURCE GROUP rg1_memory_test; -DROP RESOURCE GROUP rg2_memory_test; --- end_ignore - -CREATE OR REPLACE FUNCTION repeatPalloc(int, int) RETURNS int AS -'@abs_builddir@/../regress/regress@DLSUFFIX@', 'repeatPalloc' -LANGUAGE C READS SQL DATA; - -CREATE OR REPLACE FUNCTION hold_memory(int, int) RETURNS int AS $$ - SELECT * FROM repeatPalloc(1, $2) -$$ LANGUAGE sql; - -CREATE OR REPLACE VIEW eat_memory_on_qd AS - SELECT hold_memory(0,20); - -CREATE OR REPLACE VIEW eat_memory_on_one_slice2 AS - SELECT count(null) > 0 - FROM - gp_dist_random('gp_id') t1 - WHERE hold_memory(t1.dbid,20)=0 - ; - -CREATE OR REPLACE VIEW eat_memory_on_slices2 AS - SELECT count(null) > 0 - FROM - gp_dist_random('gp_id') t1, - gp_dist_random('gp_id') t2 - WHERE hold_memory(t1.dbid,20)=0 - AND hold_memory(t2.dbid,20)=0 - ; - -CREATE OR REPLACE FUNCTION round_test(float, integer) RETURNS float AS $$ - SELECT round($1 / $2) * $2 -$$ LANGUAGE sql; - -CREATE OR REPLACE VIEW memory_result AS - SELECT rsgname, ismaster, round_test(avg(memory_usage), 10) AS avg_mem - FROM( - SELECT rsgname, - CASE (j->'key')::text WHEN '"-1"'::text THEN 1 ELSE 0 END AS ismaster, - ((j->'value')->>'used')::int AS memory_usage - FROM( - SELECT rsgname, row_to_json(json_each(memory_usage::json)) AS j FROM - gp_toolkit.gp_resgroup_status - WHERE rsgname='rg1_memory_test' OR rsgname='rg2_memory_test' - )a - )b GROUP BY (rsgname, ismaster) ORDER BY rsgname, ismaster; - -CREATE RESOURCE GROUP rg1_memory_test - WITH (concurrency=2, cpu_rate_limit=10, memory_limit=30); -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE RESOURCE GROUP rg2_memory_test - WITH (concurrency=2, cpu_rate_limit=10, memory_limit=30); -CREATE ROLE role2_memory_test RESOURCE GROUP rg2_memory_test; - -GRANT ALL ON eat_memory_on_qd TO role1_memory_test; -GRANT ALL ON eat_memory_on_one_slice2 TO role1_memory_test; -GRANT ALL ON eat_memory_on_slices2 TO role1_memory_test; -GRANT ALL ON memory_result TO role1_memory_test; - -GRANT ALL ON eat_memory_on_qd TO role2_memory_test; -GRANT ALL ON eat_memory_on_one_slice2 TO role2_memory_test; -GRANT ALL ON eat_memory_on_slices2 TO role2_memory_test; -GRANT ALL ON memory_result TO role2_memory_test; - --- 1.1) QD only in transaction -1: SET ROLE TO role1_memory_test; --- check initial state -SELECT * FROM memory_result; -1: BEGIN; -1: SELECT * FROM eat_memory_on_qd; -SELECT * FROM memory_result; -1q: - --- 1.2) QD only -1: SET ROLE TO role1_memory_test; --- check initial state -SELECT * FROM memory_result; -1: SELECT * FROM eat_memory_on_qd; -SELECT * FROM memory_result; -1q: - --- 2.1) QEs on one slice -1: SET ROLE TO role1_memory_test; -1: SELECT * FROM eat_memory_on_one_slice2; -SELECT * FROM memory_result; -1q: - --- 2.2) QEs on one slice in transaction -1: SET ROLE TO role1_memory_test; -1: BEGIN; -1: SELECT * FROM eat_memory_on_one_slice2; -SELECT * FROM memory_result; -1q: - --- 2.3) QEs on one slice change resource group -1: SET ROLE TO role1_memory_test; -1: SELECT * FROM eat_memory_on_one_slice2; -SELECT * FROM memory_result; -1: SET ROLE TO role2_memory_test; -1: SELECT * FROM eat_memory_on_one_slice2; -SELECT * FROM memory_result; -1q: - --- 3) QEs on multiple slices -1: SET ROLE TO role1_memory_test; -1: SELECT * FROM eat_memory_on_slices2; -SELECT * FROM memory_result; -1q: - --- recheck after cleanup -SELECT * FROM memory_result; - --- 4) single role concurrency test -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role1_memory_test; --- QEs on multiple slices -1: SELECT * FROM eat_memory_on_slices2; -2: SELECT * FROM eat_memory_on_slices2; -SELECT * FROM memory_result; -1q: -2q: - --- 5) multi role concurrency test -1: SET ROLE TO role1_memory_test; -2: SET ROLE TO role2_memory_test; --- QEs on multiple slices -1: SELECT * FROM eat_memory_on_slices2; -2: SELECT * FROM eat_memory_on_slices2; -SELECT * FROM memory_result; -1q: -2q: - --- cleanup -REVOKE ALL ON eat_memory_on_qd FROM role1_memory_test; -REVOKE ALL ON eat_memory_on_one_slice2 FROM role1_memory_test; -REVOKE ALL ON eat_memory_on_slices2 FROM role1_memory_test; -REVOKE ALL ON memory_result FROM role1_memory_test; - -REVOKE ALL ON eat_memory_on_qd FROM role2_memory_test; -REVOKE ALL ON eat_memory_on_one_slice2 FROM role2_memory_test; -REVOKE ALL ON eat_memory_on_slices2 FROM role2_memory_test; -REVOKE ALL ON memory_result FROM role2_memory_test; - -ALTER ROLE role1_memory_test RESOURCE GROUP none; -ALTER ROLE role2_memory_test RESOURCE GROUP none; - -DROP ROLE role1_memory_test; -DROP ROLE role2_memory_test; -DROP RESOURCE GROUP rg1_memory_test; -DROP RESOURCE GROUP rg2_memory_test; -DROP VIEW memory_result; diff --git a/src/test/isolation2/input/resgroup/resgroup_move_query.source b/src/test/isolation2/input/resgroup/resgroup_move_query.source index b686993978e..4287f93929a 100644 --- a/src/test/isolation2/input/resgroup/resgroup_move_query.source +++ b/src/test/isolation2/input/resgroup/resgroup_move_query.source @@ -11,17 +11,6 @@ -- -- end_matchsubs -CREATE OR REPLACE FUNCTION resGroupPalloc(float) RETURNS int AS -'@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc' -LANGUAGE C READS SQL DATA; - -CREATE OR REPLACE FUNCTION hold_memory_by_percent(int, float) RETURNS int AS $$ - SELECT * FROM resGroupPalloc($2) -$$ LANGUAGE sql; - -CREATE OR REPLACE FUNCTION hold_memory_by_percent_on_qe(int, float) RETURNS int AS $$ - SELECT resGroupPalloc($2) FROM gp_dist_random('gp_id') -$$ LANGUAGE sql; -- check whether a query running in the specific group -- @param pid: the pid of QD -- @param groupname: resource group id @@ -63,7 +52,7 @@ DROP ROLE IF EXISTS role_move_query; -- start_ignore DROP RESOURCE GROUP rg_move_query; -- end_ignore -CREATE RESOURCE GROUP rg_move_query WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_move_query WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE ROLE role_move_query RESOURCE GROUP rg_move_query; -- test1: cannot move IDLE sessions @@ -83,67 +72,5 @@ SELECT is_session_in_group(pid, 'default_group') FROM pg_stat_activity WHERE wai 2<: 2: END; --- test3: cannot move sessions that don't have enough memory on QD -CREATE RESOURCE GROUP rg_move_query_mem_small WITH (concurrency=1, cpu_rate_limit=20, memory_limit=10); -CREATE ROLE role_move_query_mem_small RESOURCE GROUP rg_move_query_mem_small; -1: SET ROLE role_move_query; -1: BEGIN; -1: SELECT hold_memory_by_percent(1,1.0); -SELECT pg_resgroup_move_query(pid, 'rg_move_query_mem_small') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent%' AND state = 'idle in transaction'; -SELECT is_session_in_group(pid, 'rg_move_query_mem_small') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent%' AND state = 'idle in transaction'; -1: END; -1q: - --- test4: cannot move sessions that don't have enough memory on QE -1: SET ROLE role_move_query; -1: BEGIN; -1: SELECT hold_memory_by_percent_on_qe(1,1.0); -SELECT pg_resgroup_move_query(pid, 'rg_move_query_mem_small') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND state = 'idle in transaction'; -SELECT is_session_in_group(pid, 'rg_move_query_mem_small') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND state = 'idle in transaction'; -1: END; -1q: - --- test5: move query will wait if the destination group doesn't have slot -1: SET ROLE role_move_query; -1: BEGIN; -1: SELECT hold_memory_by_percent_on_qe(1,0.1); -2: SET ROLE role_move_query_mem_small; -2: BEGIN; -3&: SELECT pg_resgroup_move_query(pid, 'rg_move_query_mem_small') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND state = 'idle in transaction'; -2: END; -3<: -3: SELECT is_session_in_group(pid, 'rg_move_query_mem_small') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND state = 'idle in transaction'; -1: END; -1q: -2q: -3q: - --- test6: the destination group will wake up 'pg_resgroup_move_query' when a new slot become available -1: SET ROLE role_move_query; -1&: SELECT pg_sleep(5); -2: SET ROLE role_move_query_mem_small; -2&: SELECT pg_sleep(10); -3&: SELECT pg_resgroup_move_query(pid, 'rg_move_query') FROM pg_stat_activity WHERE query LIKE '%pg_sleep(10)%' AND rsgname='rg_move_query_mem_small'; -1<: --- connection 1 finished, it will wake up connection 3 -3<: -3: SELECT rsgname, query FROM pg_stat_activity WHERE state = 'active' and query like 'SELECT%'; -2<: -1q: -2q: -3q: - --- test7: the destination group memory_limit is 0, meaning use the global shared memory -1: ALTER RESOURCE GROUP rg_move_query SET memory_limit 0; -1: SET ROLE role_move_query_mem_small; -1: BEGIN; -1: SELECT hold_memory_by_percent_on_qe(1,0.1); -2: SELECT pg_resgroup_move_query(pid, 'rg_move_query') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND rsgname='rg_move_query_mem_small'; -2: SELECT is_session_in_group(pid, 'rg_move_query') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND state = 'idle in transaction'; -1q: -2q: - DROP ROLE role_move_query; DROP RESOURCE GROUP rg_move_query; -DROP ROLE role_move_query_mem_small; -DROP RESOURCE GROUP rg_move_query_mem_small; diff --git a/src/test/isolation2/isolation2_resgroup_schedule b/src/test/isolation2/isolation2_resgroup_schedule index 5a346d23965..b621d201bc3 100644 --- a/src/test/isolation2/isolation2_resgroup_schedule +++ b/src/test/isolation2/isolation2_resgroup_schedule @@ -11,38 +11,19 @@ test: resgroup/resgroup_name_convention test: resgroup/resgroup_assign_slot_fail test: resgroup/resgroup_unassign_entrydb test: resgroup/resgroup_seg_down_2pc -test: resgroup/resgroup_query_mem # functions test: resgroup/resgroup_concurrency test: resgroup/resgroup_bypass -test: resgroup/resgroup_bypass_memory_limit -test: resgroup/resgroup_alter_concurrency -test: resgroup/resgroup_memory_statistic -test: resgroup/resgroup_memory_limit -test: resgroup/resgroup_memory_runaway -test: resgroup/resgroup_alter_memory +#test: resgroup/resgroup_alter_concurrency test: resgroup/resgroup_cpu_rate_limit -test: resgroup/resgroup_alter_memory_spill_ratio test: resgroup/resgroup_cpuset test: resgroup/resgroup_cpuset_empty_default -test: resgroup/resgroup_set_memory_spill_ratio -test: resgroup/resgroup_unlimit_memory_spill_ratio test: resgroup/resgroup_cancel_terminate_concurrency test: resgroup/resgroup_move_query -# memory spill tests -#test: resgroup/resgroup_memory_hashagg_spill -#test: resgroup/resgroup_memory_hashjoin_spill -#test: resgroup/resgroup_memory_materialize_spill -#test: resgroup/resgroup_memory_sisc_mat_sort -#test: resgroup/resgroup_memory_sisc_sort_spill -#test: resgroup/resgroup_memory_sort_spill -#test: resgroup/resgroup_memory_spilltodisk - # regression tests test: resgroup/resgroup_recreate -test: resgroup/resgroup_operator_memory test: resgroup/resgroup_functions # parallel tests diff --git a/src/test/isolation2/output/resgroup/disable_resgroup.source b/src/test/isolation2/output/resgroup/disable_resgroup.source index 332bafc0913..3af30f4e255 100644 --- a/src/test/isolation2/output/resgroup/disable_resgroup.source +++ b/src/test/isolation2/output/resgroup/disable_resgroup.source @@ -26,17 +26,5 @@ SHOW gp_resource_manager; -- reset settings ALTER RESOURCE GROUP admin_group SET concurrency 10; ALTER -ALTER RESOURCE GROUP admin_group SET memory_spill_ratio 0; -ALTER -ALTER RESOURCE GROUP admin_group SET memory_limit 10; -ALTER -ALTER RESOURCE GROUP admin_group SET memory_shared_quota 80; -ALTER ALTER RESOURCE GROUP default_group SET concurrency 20; ALTER -ALTER RESOURCE GROUP default_group SET memory_spill_ratio 0; -ALTER -ALTER RESOURCE GROUP default_group SET memory_limit 0; -ALTER -ALTER RESOURCE GROUP default_group SET memory_shared_quota 80; -ALTER diff --git a/src/test/isolation2/output/resgroup/enable_resgroup.source b/src/test/isolation2/output/resgroup/enable_resgroup.source index 1553ebe6419..0bb74cfc74a 100644 --- a/src/test/isolation2/output/resgroup/enable_resgroup.source +++ b/src/test/isolation2/output/resgroup/enable_resgroup.source @@ -16,23 +16,6 @@ CREATE -- end_ignore --- we want to simulate a 3-segment (both master and primary) cluster with 2GB --- memory and gp_resource_group_memory_limit=100%, suppose: --- --- - total: the total memory on the system; --- - nsegs: the max per-host segment count (including both master and primaries); --- - limit: the gp_resource_group_memory_limit used for the simulation; --- --- then we have: total * limit / nsegs = 2GB * 1.0 / 3 --- so: limit = 2GB * 1.0 / 3 * nsegs / total --- --- with the simulation each primary segment should manage 682MB memory. -DO LANGUAGE plpython3u $$ import os import psutil -mem = psutil.virtual_memory().total swap = psutil.swap_memory().total overcommit = int(open('/proc/sys/vm/overcommit_ratio').readline()) total = swap + mem * overcommit / 100. -nsegs = int(plpy.execute(''' SELECT count(hostname) as nsegs FROM gp_segment_configuration WHERE preferred_role = 'p' GROUP BY hostname ORDER BY count(hostname) DESC LIMIT 1 ''')[0]['nsegs']) -limit = (2 << 30) * 1.0 * nsegs / 3 / total os.system('gpconfig -c gp_resource_group_memory_limit -v {:f}'.format(limit)) $$; -DO - -- enable resource group and restart cluster. -- start_ignore ! gpconfig -c gp_resource_manager -v group; @@ -71,11 +54,11 @@ DO -- verify the default settings 0: SELECT * from gp_toolkit.gp_resgroup_config; - groupid | groupname | concurrency | cpu_rate_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+---------------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- - 6437 | default_group | 20 | 20 | 0 | 80 | 0 | vmtracker | -1 - 6438 | admin_group | 10 | 10 | 10 | 80 | 0 | vmtracker | -1 - 6441 | system_group | 0 | 10 | 0 | 0 | 0 | vmtracker | -1 + groupid | groupname | concurrency | cpu_hard_quota_limit | cpu_soft_priority | cpuset +---------+---------------+-------------+----------------------+-------------------+-------- + 6437 | default_group | 20 | 20 | 100 | -1 + 6438 | admin_group | 10 | 10 | 100 | -1 + 6441 | system_group | 0 | 10 | 100 | -1 (3 rows) -- by default admin_group has concurrency set to -1 which leads to @@ -83,18 +66,3 @@ DO 0: ALTER RESOURCE GROUP admin_group SET concurrency 2; ALTER --- explicitly set memory settings -0: ALTER RESOURCE GROUP admin_group SET memory_limit 10; -ALTER -0: ALTER RESOURCE GROUP default_group SET memory_limit 30; -ALTER -0: ALTER RESOURCE GROUP admin_group SET memory_shared_quota 80; -ALTER -0: ALTER RESOURCE GROUP default_group SET memory_shared_quota 80; -ALTER --- in later cases we will SHOW memory_spill_ratio as first command --- to verify that it can be correctly loaded even for bypassed commands -0: ALTER RESOURCE GROUP admin_group SET memory_spill_ratio 10; -ALTER -0: ALTER RESOURCE GROUP default_group SET memory_spill_ratio 10; -ALTER diff --git a/src/test/isolation2/output/resgroup/resgroup_alter_memory.source b/src/test/isolation2/output/resgroup/resgroup_alter_memory.source index 2a84c970a5a..d82f6e1acb6 100644 --- a/src/test/isolation2/output/resgroup/resgroup_alter_memory.source +++ b/src/test/isolation2/output/resgroup/resgroup_alter_memory.source @@ -28,7 +28,7 @@ CREATE CREATE OR REPLACE VIEW rg_mem_status AS SELECT groupname, memory_limit, memory_shared_quota FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg1_memory_test' OR groupname='rg2_memory_test' ORDER BY groupid; CREATE -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=5); +CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=5); CREATE CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; CREATE @@ -321,7 +321,7 @@ ALTER ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 0; ALTER -CREATE RESOURCE GROUP rg2_memory_test WITH (concurrency=3, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=5); +CREATE RESOURCE GROUP rg2_memory_test WITH (concurrency=3, cpu_hard_quota_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=5); CREATE CREATE ROLE role2_memory_test RESOURCE GROUP rg2_memory_test; CREATE @@ -869,7 +869,7 @@ DROP -- -- Test PrepareTransaction report an error -- -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5, memory_limit=5); CREATE CREATE ROLE rg_test_role RESOURCE GROUP rg_test_group; CREATE diff --git a/src/test/isolation2/output/resgroup/resgroup_bypass.source b/src/test/isolation2/output/resgroup/resgroup_bypass.source index 0dfb9b00ab4..a373c66d65d 100644 --- a/src/test/isolation2/output/resgroup/resgroup_bypass.source +++ b/src/test/isolation2/output/resgroup/resgroup_bypass.source @@ -1,383 +1,2 @@ -DROP ROLE IF EXISTS role_bypass_test; -DROP --- start_ignore -DROP RESOURCE GROUP rg_bypass_test; -DROP --- end_ignore - --- --- setup --- - -CREATE RESOURCE GROUP rg_bypass_test WITH (concurrency=2, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50); -CREATE -CREATE ROLE role_bypass_test RESOURCE GROUP rg_bypass_test; -CREATE - -CREATE OR REPLACE FUNCTION repeatPalloc(int, int) RETURNS int AS '@abs_builddir@/../regress/regress@DLSUFFIX@', 'repeatPalloc' LANGUAGE C READS SQL DATA; -CREATE - -CREATE OR REPLACE FUNCTION hold_memory(int, int) RETURNS int AS $$ SELECT * FROM repeatPalloc(1, $2) $$ LANGUAGE sql; -CREATE - -CREATE OR REPLACE VIEW eat_memory_on_qd_small AS SELECT hold_memory(0,12); -CREATE - -CREATE OR REPLACE VIEW eat_memory_on_qd_large AS SELECT hold_memory(0,100); -CREATE - -CREATE OR REPLACE VIEW eat_memory_on_one_slice AS SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory(t1.dbid,4)=0 ; -CREATE - -CREATE OR REPLACE VIEW eat_memory_on_slices AS SELECT count(null) FROM gp_dist_random('gp_id') t1, gp_dist_random('gp_id') t2 WHERE hold_memory(t1.dbid,4)=0 AND hold_memory(t2.dbid,4)=0 ; -CREATE - -CREATE OR REPLACE FUNCTION round_test(float, integer) RETURNS float AS $$ SELECT round($1 / $2) * $2 $$ LANGUAGE sql; -CREATE - -CREATE OR REPLACE VIEW memory_result AS SELECT rsgname, ismaster, round_test(avg(memory_usage), 1) AS avg_mem FROM( SELECT rsgname, CASE (j->'key')::text WHEN '"-1"'::text THEN 1 ELSE 0 END AS ismaster, ((j->'value')->>'used')::int AS memory_usage FROM( SELECT rsgname, row_to_json(json_each(memory_usage::json)) AS j FROM gp_toolkit.gp_resgroup_status WHERE rsgname='rg_bypass_test' )a )b GROUP BY (rsgname, ismaster) ORDER BY rsgname, ismaster; -CREATE - -GRANT ALL ON eat_memory_on_qd_small TO role_bypass_test; -GRANT -GRANT ALL ON eat_memory_on_qd_large TO role_bypass_test; -GRANT -GRANT ALL ON eat_memory_on_one_slice TO role_bypass_test; -GRANT -GRANT ALL ON eat_memory_on_slices TO role_bypass_test; -GRANT -GRANT ALL ON memory_result TO role_bypass_test; -GRANT - --- --- SET command should be bypassed --- - -ALTER RESOURCE GROUP rg_bypass_test SET concurrency 0; -ALTER -61: SET ROLE role_bypass_test; -SET -61&: SELECT 1; -ALTER RESOURCE GROUP rg_bypass_test set concurrency 1; -ALTER -61<: <... completed> - ?column? ----------- - 1 -(1 row) -ALTER RESOURCE GROUP rg_bypass_test set concurrency 0; -ALTER -61: SET enable_hashagg to on; -SET -61: SHOW enable_hashagg; - enable_hashagg ----------------- - on -(1 row) -61: invalid_syntax; -ERROR: syntax error at or near "invalid_syntax" -LINE 1: invalid_syntax; - ^ -61q: ... - --- --- gp_resource_group_bypass --- - -ALTER RESOURCE GROUP rg_bypass_test SET concurrency 0; -ALTER -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61: SHOW gp_resource_group_bypass; - gp_resource_group_bypass --------------------------- - on -(1 row) -61: CREATE TABLE table_bypass_test (c1 int); -CREATE -61: INSERT INTO table_bypass_test SELECT generate_series(1,100); -INSERT 100 -61: SELECT count(*) FROM table_bypass_test; - count -------- - 100 -(1 row) -61: DROP TABLE table_bypass_test; -DROP -61: SET gp_resource_group_bypass to off; -SET -61: SHOW gp_resource_group_bypass; - gp_resource_group_bypass --------------------------- - off -(1 row) -61q: ... - --- --- gp_resource_group_bypass is not allowed inside a transaction block --- - -61: BEGIN; -BEGIN -61: SET gp_resource_group_bypass to on; -ERROR: SET gp_resource_group_bypass cannot run inside a transaction block -61: ABORT; -ABORT -61q: ... - --- --- gp_resource_group_bypass is not allowed inside a function --- - -DROP FUNCTION IF EXISTS func_resgroup_bypass_test(int); -DROP -CREATE FUNCTION func_resgroup_bypass_test(c1 int) RETURNS INT AS $$ SET gp_resource_group_bypass TO ON; /* inside a function */ SELECT 1 $$ LANGUAGE SQL; -CREATE -SELECT func_resgroup_bypass_test(1); -ERROR: SET gp_resource_group_bypass cannot run inside a transaction block -CONTEXT: SQL function "func_resgroup_bypass_test" statement 1 -DROP FUNCTION func_resgroup_bypass_test(int); -DROP - - --- --- memory limit in bypass mode, on qd --- --- orca will allocate 10M memory error buffer before optimization, and release --- it after that, so if optimizer is set to on, it will fail when the memory --- usage reaches 24M - -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61: BEGIN; -BEGIN -61: SELECT * FROM eat_memory_on_qd_small; - hold_memory -------------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 12 -(2 rows) -61: SELECT * FROM eat_memory_on_qd_large; -ERROR: Out of memory -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory" statement 1 -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 0 -(2 rows) -61: ABORT; -ABORT -61: BEGIN; -BEGIN -SELECT 1 FROM memory_result where avg_mem > 10 and ismaster = 1; - ?column? ----------- - 1 -(1 row) -61q: ... - --- --- memory limit in bypass mode, on one slice --- - -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61: BEGIN; -BEGIN -61: SELECT * FROM eat_memory_on_one_slice; - count -------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 4 - rg_bypass_test | 1 | 0 -(2 rows) -61: SELECT * FROM eat_memory_on_one_slice; - count -------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 8 - rg_bypass_test | 1 | 0 -(2 rows) -61: SELECT * FROM eat_memory_on_one_slice; -ERROR: Out of memory (seg0 slice1 127.0.0.1:25432 pid=336) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory" statement 1 -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 0 -(2 rows) -61: ABORT; -ABORT -61: BEGIN; -BEGIN -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 0 -(2 rows) -61q: ... - --- --- memory limit in bypass mode, on slices --- - -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61: BEGIN; -BEGIN -61: SELECT * FROM eat_memory_on_slices; - count -------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 4 - rg_bypass_test | 1 | 0 -(2 rows) -61: SELECT * FROM eat_memory_on_slices; - count -------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 8 - rg_bypass_test | 1 | 0 -(2 rows) -61: SELECT * FROM eat_memory_on_slices; -ERROR: Out of memory (seg0 slice2 127.0.0.1:25432 pid=354) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory" statement 1 -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 0 -(2 rows) -61: ABORT; -ABORT -61: BEGIN; -BEGIN -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem -----------------+----------+--------- - rg_bypass_test | 0 | 0 - rg_bypass_test | 1 | 0 -(2 rows) -61q: ... - --- --- gp_resgroup_status.num_running is updated in bypass mode --- - -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61&: SELECT pg_sleep(10); -62: SET ROLE role_bypass_test; -SET -62: SET gp_resource_group_bypass to on; -SET -62&: SELECT pg_sleep(20); -SELECT num_running FROM gp_toolkit.gp_resgroup_status WHERE rsgname='rg_bypass_test'; - num_running -------------- - 2 -(1 row) -SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE rsgname='rg_bypass_test'; - pg_cancel_backend -------------------- - t - t -(2 rows) -61<: <... completed> -ERROR: canceling statement due to user request -62<: <... completed> -ERROR: canceling statement due to user request -61q: ... -62q: ... - --- --- pg_stat_activity is updated in bypass mode --- - -61: SET ROLE role_bypass_test; -SET -61: SET gp_resource_group_bypass to on; -SET -61&: SELECT pg_sleep(10); -62: SET ROLE role_bypass_test; -SET -62: SET gp_resource_group_bypass to on; -SET -62&: SELECT pg_sleep(20); -SELECT query FROM pg_stat_activity WHERE rsgname='rg_bypass_test'; - query ----------------------- - SELECT pg_sleep(10); - SELECT pg_sleep(20); -(2 rows) -SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE rsgname='rg_bypass_test'; - pg_cancel_backend -------------------- - t - t -(2 rows) -61<: <... completed> -ERROR: canceling statement due to user request -62<: <... completed> -ERROR: canceling statement due to user request -61q: ... -62q: ... - --- --- cleanup --- - -REVOKE ALL ON eat_memory_on_qd_small FROM role_bypass_test; -REVOKE -REVOKE ALL ON eat_memory_on_qd_large FROM role_bypass_test; -REVOKE -REVOKE ALL ON eat_memory_on_one_slice FROM role_bypass_test; -REVOKE -REVOKE ALL ON eat_memory_on_slices FROM role_bypass_test; -REVOKE -REVOKE ALL ON memory_result FROM role_bypass_test; -REVOKE - -DROP ROLE role_bypass_test; -DROP -DROP RESOURCE GROUP rg_bypass_test; -DROP - --- vi:filetype=sql: +-- RG FIXME: The bypass mode will be re-design in the latest future, so just remove all the test case, because this +-- file is deeply dependent on the memory model which has been removed in this PR. diff --git a/src/test/isolation2/output/resgroup/resgroup_cpu_rate_limit.source b/src/test/isolation2/output/resgroup/resgroup_cpu_rate_limit.source index 852ad5953a7..70213d398e7 100644 --- a/src/test/isolation2/output/resgroup/resgroup_cpu_rate_limit.source +++ b/src/test/isolation2/output/resgroup/resgroup_cpu_rate_limit.source @@ -49,32 +49,32 @@ CREATE VIEW cancel_all AS SELECT pg_cancel_backend(pid) FROM pg_stat_activity WH CREATE -- create two resource groups -CREATE RESOURCE GROUP rg1_cpu_test WITH (concurrency=5, cpu_rate_limit=10, memory_limit=20); +CREATE RESOURCE GROUP rg1_cpu_test WITH (concurrency=5, cpu_hard_quota_limit=-1, cpu_soft_priority=100); CREATE -CREATE RESOURCE GROUP rg2_cpu_test WITH (concurrency=5, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg2_cpu_test WITH (concurrency=5, cpu_hard_quota_limit=-1, cpu_soft_priority=200); CREATE -- -- check gpdb cgroup configuration -- -DO LANGUAGE PLPYTHON3U $$ import subprocess +DO LANGUAGE PLPYTHON3U $$ import subprocess import os cgroot = '@cgroup_mnt_point@' def get_cgroup_prop(prop): fullpath = cgroot + '/' + prop return int(open(fullpath).readline()) def run_command(cmd): return subprocess.check_output(cmd.split()).decode() def show_guc(guc): return plpy.execute('SHOW {}'.format(guc))[0][guc] # # check gpdb top-level cgroup configuration # # get top-level cgroup props cfs_quota_us = get_cgroup_prop('/cpu/gpdb/cpu.cfs_quota_us') cfs_period_us = get_cgroup_prop('/cpu/gpdb/cpu.cfs_period_us') shares = get_cgroup_prop('/cpu/gpdb/cpu.shares') -# get system props ncores = int(run_command('nproc')) +# get system props ncores = os.cpu_count() # get global gucs gp_resource_group_cpu_limit = float(show_guc('gp_resource_group_cpu_limit')) gp_resource_group_cpu_priority = int(show_guc('gp_resource_group_cpu_priority')) # cfs_quota_us := cfs_period_us * ncores * gp_resource_group_cpu_limit assert cfs_quota_us == cfs_period_us * ncores * gp_resource_group_cpu_limit # shares := 1024 * gp_resource_group_cpu_priority assert shares == 1024 * gp_resource_group_cpu_priority -# SUB/shares := TOP/shares * cpu_rate_limit def check_group_shares(name): cpu_rate_limit = int(plpy.execute(''' SELECT value FROM pg_resgroupcapability c, pg_resgroup g WHERE c.resgroupid=g.oid AND reslimittype=2 AND g.rsgname='{}' '''.format(name))[0]['value']) oid = int(plpy.execute(''' SELECT oid FROM pg_resgroup WHERE rsgname='{}' '''.format(name))[0]['oid']) sub_shares = get_cgroup_prop('/cpu/gpdb/{}/cpu.shares'.format(oid)) assert sub_shares == int(shares * cpu_rate_limit / 100) +def check_group_shares(name): cpu_soft_priority = int(plpy.execute(''' SELECT value FROM pg_resgroupcapability c, pg_resgroup g WHERE c.resgroupid=g.oid AND reslimittype=3 AND g.rsgname='{}' '''.format(name))[0]['value']) oid = int(plpy.execute(''' SELECT oid FROM pg_resgroup WHERE rsgname='{}' '''.format(name))[0]['oid']) sub_shares = get_cgroup_prop('/cpu/gpdb/{}/cpu.shares'.format(oid)) assert sub_shares == int(cpu_soft_priority * 1024 / 100) # check default groups check_group_shares('default_group') check_group_shares('admin_group') # check user groups check_group_shares('rg1_cpu_test') check_group_shares('rg2_cpu_test') $$; DO --- lower admin_group's cpu_rate_limit to minimize its side effect -ALTER RESOURCE GROUP admin_group SET cpu_rate_limit 1; +-- lower admin_group's cpu_hard_quota_limit to minimize its side effect +ALTER RESOURCE GROUP admin_group SET cpu_hard_quota_limit 1; ALTER -- create two roles and assign them to above groups @@ -116,16 +116,6 @@ SET -- on empty load the cpu usage shall be 0% -- --- --- a group should burst to use all the cpu usage --- when it's the only one with running queries. --- --- however the overall cpu usage is controlled by a GUC --- gp_resource_group_cpu_limit which is 90% by default. --- --- so the cpu usage shall be 90% --- - 10&: SELECT * FROM gp_dist_random('gp_id') WHERE busy() IS NULL; 11&: SELECT * FROM gp_dist_random('gp_id') WHERE busy() IS NULL; 12&: SELECT * FROM gp_dist_random('gp_id') WHERE busy() IS NULL; @@ -133,6 +123,7 @@ SET 14&: SELECT * FROM gp_dist_random('gp_id') WHERE busy() IS NULL; -- start_ignore +-- Gather CPU usage statistics into cpu_usage_samples TRUNCATE TABLE cpu_usage_samples; TRUNCATE SELECT fetch_sample(); @@ -287,9 +278,9 @@ SET -- -- when there are multiple groups with parallel queries, --- they should share the cpu usage by their cpu_usage settings, +-- they should share the cpu usage by their cpu_soft_priority settings, -- --- rg1_cpu_test:rg2_cpu_test is 0.1:0.2 => 1:2, so: +-- rg1_cpu_test:rg2_cpu_test is 100:200 => 1:2, so: -- -- - rg1_cpu_test gets 90% * 1/3 => 30%; -- - rg2_cpu_test gets 90% * 2/3 => 60%; @@ -477,38 +468,13 @@ ERROR: canceling statement due to user request 24q: ... -- end_ignore --- start_ignore -! gpconfig -c gp_resource_group_cpu_ceiling_enforcement -v on; -20210405:09:43:44:019995 gpconfig:hubert-gp-centos:huanzhang-[INFO]:-completed successfully with parameters '-c gp_resource_group_cpu_ceiling_enforcement -v on' - -! gpstop -rai; -20210405:09:43:44:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Starting gpstop with args: -rai -20210405:09:43:44:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Gathering information and validating the environment... -20210405:09:43:44:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Obtaining Cloudberry Coordinator catalog information -20210405:09:43:44:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Obtaining Segment details from coordinator... -20210405:09:43:44:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Cloudberry Version: 'postgres (Cloudberry Database) 7.0.0-alpha.0+dev.14449.gd1235cef56 build dev' -20210405:09:43:44:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Commencing Coordinator instance shutdown with mode='immediate' -20210405:09:43:44:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Coordinator segment instance directory=/home/huanzhang/workspace/gpdb/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 -20210405:09:43:44:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Attempting forceful termination of any leftover coordinator process -20210405:09:43:44:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Terminating processes for segment /home/huanzhang/workspace/gpdb/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 -20210405:09:43:44:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Stopping coordinator standby host hubert-gp-centos mode=immediate -20210405:09:43:46:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Successfully shutdown standby process on hubert-gp-centos -20210405:09:43:46:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Targeting dbid [2, 5, 3, 6, 4, 7] for shutdown -20210405:09:43:46:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Commencing parallel primary segment instance shutdown, please wait... -20210405:09:43:46:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-0.00% of jobs completed -20210405:09:43:47:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-100.00% of jobs completed -20210405:09:43:47:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Commencing parallel mirror segment instance shutdown, please wait... -20210405:09:43:47:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-0.00% of jobs completed -20210405:09:43:48:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-100.00% of jobs completed -20210405:09:43:48:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:----------------------------------------------------- -20210405:09:43:48:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:- Segments stopped successfully = 6 -20210405:09:43:48:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:- Segments with errors during stop = 0 -20210405:09:43:48:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:----------------------------------------------------- -20210405:09:43:48:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Successfully shutdown 6 of 6 segment instances -20210405:09:43:48:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Database successfully shutdown with no errors reported -20210405:09:43:48:020373 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Restarting System... --- end_ignore + +-- Test hard quota limit +ALTER RESOURCE GROUP rg1_cpu_test set cpu_hard_quota_limit 10; +ALTER +ALTER RESOURCE GROUP rg2_cpu_test set cpu_hard_quota_limit 20; +ALTER -- prepare parallel queries in the two groups 10: SET ROLE TO role1_cpu_test; @@ -658,6 +624,7 @@ TRUNCATE (1 row) -- end_ignore +-- verify it 1:SELECT verify_cpu_usage('rg1_cpu_test', 10, 2); verify_cpu_usage ------------------ @@ -706,9 +673,9 @@ SET -- -- when there are multiple groups with parallel queries, --- they should follow the ceiling enforcement of the cpu usage. +-- they should follow the enforcement of the cpu usage. -- --- rg1_cpu_test:rg2_cpu_test is 0.1:0.2, so: +-- rg1_cpu_test:rg2_cpu_test is 10:20, so: -- -- - rg1_cpu_test gets 10%; -- - rg2_cpu_test gets 20%; @@ -898,41 +865,8 @@ ERROR: canceling statement due to user request 1q: ... -- end_ignore --- start_ignore -! gpconfig -c gp_resource_group_cpu_ceiling_enforcement -v off; -20210405:09:44:48:022326 gpconfig:hubert-gp-centos:huanzhang-[INFO]:-completed successfully with parameters '-c gp_resource_group_cpu_ceiling_enforcement -v off' - -! gpstop -rai; -20210405:09:44:48:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Starting gpstop with args: -rai -20210405:09:44:48:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Gathering information and validating the environment... -20210405:09:44:48:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Obtaining Cloudberry Coordinator catalog information -20210405:09:44:48:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Obtaining Segment details from coordinator... -20210405:09:44:48:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Cloudberry Version: 'postgres (Cloudberry Database) 7.0.0-alpha.0+dev.14449.gd1235cef56 build dev' -20210405:09:44:48:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Commencing Coordinator instance shutdown with mode='immediate' -20210405:09:44:48:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Coordinator segment instance directory=/home/huanzhang/workspace/gpdb/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 -20210405:09:44:48:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Attempting forceful termination of any leftover coordinator process -20210405:09:44:48:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Terminating processes for segment /home/huanzhang/workspace/gpdb/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 -20210405:09:44:48:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Stopping coordinator standby host hubert-gp-centos mode=immediate -20210405:09:44:49:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Successfully shutdown standby process on hubert-gp-centos -20210405:09:44:49:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Targeting dbid [2, 5, 3, 6, 4, 7] for shutdown -20210405:09:44:49:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Commencing parallel primary segment instance shutdown, please wait... -20210405:09:44:49:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-0.00% of jobs completed -20210405:09:44:50:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-100.00% of jobs completed -20210405:09:44:50:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Commencing parallel mirror segment instance shutdown, please wait... -20210405:09:44:50:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-0.00% of jobs completed -20210405:09:44:51:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-100.00% of jobs completed -20210405:09:44:51:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:----------------------------------------------------- -20210405:09:44:51:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:- Segments stopped successfully = 6 -20210405:09:44:51:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:- Segments with errors during stop = 0 -20210405:09:44:51:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:----------------------------------------------------- -20210405:09:44:51:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Successfully shutdown 6 of 6 segment instances -20210405:09:44:51:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Database successfully shutdown with no errors reported -20210405:09:44:51:022698 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Restarting System... - --- end_ignore - --- restore admin_group's cpu_rate_limit -2:ALTER RESOURCE GROUP admin_group SET cpu_rate_limit 10; +-- restore admin_group's cpu_hard_quota_limit +2:ALTER RESOURCE GROUP admin_group SET cpu_hard_quota_limit 10; ALTER -- cleanup diff --git a/src/test/isolation2/output/resgroup/resgroup_cpuset.source b/src/test/isolation2/output/resgroup/resgroup_cpuset.source index 38eb29fc0c5..6c857905553 100644 --- a/src/test/isolation2/output/resgroup/resgroup_cpuset.source +++ b/src/test/isolation2/output/resgroup/resgroup_cpuset.source @@ -1,25 +1,22 @@ - -CREATE RESOURCE GROUP rg1_cpuset_test WITH (memory_limit = 10, cpuset='0'); -CREATE -CREATE ROLE role1_cpuset_test RESOURCE GROUP rg1_cpuset_test; -CREATE - --- check whether the queries running in the specific group on the specific core set --- @param grp: the resource group queries running in +-- check whether the queries running on the specific core set +-- @param grp: the resource group name queries running in -- @param cpuset: cpu cores which the queries should only be run on them, e.g. 0,1 -- @return bool: true/false indicating whether it corresponds to the rule CREATE FUNCTION check_cpuset(grp TEXT, cpuset TEXT) RETURNS BOOL AS $$ import subprocess import pg import time import re conn = pg.connect(dbname="isolation2resgrouptest") pt = re.compile(r'con(\d+)') -def check(expect_cpus, sess_ids): procs = subprocess.check_output(['ps', '-eF']).decode().split('\n') head, proc_stats = procs[0], procs[1:] PSR = [id for id, attr in enumerate(head.split()) if attr.strip() == 'PSR'][0] cpus = [proc_stat.split()[PSR].strip() for proc_stat in proc_stats if 'postgres' in proc_stat and pt.findall(proc_stat) and set(pt.findall(proc_stat)).issubset(sess_ids)] return set(cpus).issubset(set(expect_cpus)) +def check(expect_cpus, sess_ids): # use ps -eF to find all processes which belongs to postgres and in the given sessions +procs = subprocess.check_output(['ps', '-eF']).decode().split('\n') head, proc_stats = procs[0], procs[1:] PSR = [id for id, attr in enumerate(head.split()) if attr.strip() == 'PSR'][0] cpus = [proc_stat.split()[PSR].strip() for proc_stat in proc_stats if 'postgres' in proc_stat and pt.findall(proc_stat) and sess_ids.issubset(set(pt.findall(proc_stat)))] return set(cpus).issubset(set(expect_cpus)) def get_all_sess_ids_in_group(group_name): sql = "select sess_id from pg_stat_activity where rsgname = '%s'" % group_name result = conn.query(sql).getresult() return set([str(r[0]) for r in result]) -expect_cpu = [] conf = cpuset if conf == '': fd = open("@cgroup_mnt_point@/cpuset/gpdb/cpuset.cpus") line = fd.readline() fd.close() conf = line.strip('\n') -tokens = conf.split(",") for token in tokens: if token.find('-') != -1: interval = token.split("-") num1 = interval[0] num2 = interval[1] for num in range(int(num1), int(num2) + 1): expect_cpu.append(str(num)) else: expect_cpu.append(token) sess_ids = get_all_sess_ids_in_group(grp) +conf = cpuset if conf == '': fd = open("@cgroup_mnt_point@/cpuset/gpdb/cpuset.cpus") line = fd.readline() fd.close() conf = line.strip('\n') +tokens = conf.split(",") +expect_cpu = [] +for token in tokens: if token.find('-') != -1: interval = token.split("-") num1 = interval[0] num2 = interval[1] for num in range(int(num1), int(num2) + 1): expect_cpu.append(str(num)) else: expect_cpu.append(token) sess_ids = get_all_sess_ids_in_group(grp) for i in range(1000): time.sleep(0.01) if not check(expect_cpu, sess_ids): return False return True $$ LANGUAGE plpython3u; CREATE -- create a resource group that contains all the cpu cores -CREATE FUNCTION create_allcores_group(grp TEXT) RETURNS BOOL AS $$ import pg conn = pg.connect(dbname="isolation2resgrouptest") file = "@cgroup_mnt_point@/cpuset/gpdb/cpuset.cpus" fd = open(file) line = fd.readline() fd.close() line = line.strip('\n') sql = "create resource group " + grp + " with (" \ + "memory_limit=1, cpuset='" + line + "')" result = conn.query(sql) +CREATE FUNCTION create_allcores_group(grp TEXT) RETURNS BOOL AS $$ import pg conn = pg.connect(dbname="isolation2resgrouptest") file = "@cgroup_mnt_point@/cpuset/gpdb/cpuset.cpus" fd = open(file) line = fd.readline() fd.close() line = line.strip('\n') sql = "create resource group " + grp + " with (" \ + "cpuset='" + line + "')" result = conn.query(sql) file = "@cgroup_mnt_point@/cpuset/gpdb/1/cpuset.cpus" fd = open(file) line = fd.readline() fd.close() line = line.strip('\n') if line != "0": return False return True $$ LANGUAGE plpython3u; CREATE @@ -29,7 +26,9 @@ CREATE FUNCTION check_rules() RETURNS BOOL AS $$ import pg def get_all_group_which_cpuset_is_set(): sql = "select groupid,cpuset from gp_toolkit.gp_resgroup_config where cpuset != '-1'" result = conn.query(sql).getresult() return result def parse_cpuset(line): line = line.strip('\n') if len(line) == 0: return set([]) tokens = line.split(",") cpuset = [] for token in tokens: if token.find('-') != -1: interval = token.split("-") num1 = interval[0] num2 = interval[1] for num in range(int(num1), int(num2) + 1): cpuset.append(str(num)) else: cpuset.append(token) return set(cpuset) def get_cgroup_cpuset(group): group = str(group) if group == '0': file = "@cgroup_mnt_point@/cpuset/gpdb/cpuset.cpus" else: file = "@cgroup_mnt_point@/cpuset/gpdb/" + group + "/cpuset.cpus" fd = open(file) line = fd.readline() fd.close() return parse_cpuset(line) -conn = pg.connect(dbname="isolation2resgrouptest") config_groups = get_all_group_which_cpuset_is_set() groups_cpuset = set([]) # check whether cpuset in config and cgroup are same, and have no overlap for config_group in config_groups: groupid = config_group[0] cpuset_value = config_group[1] config_cpuset = parse_cpuset(cpuset_value) cgroup_cpuset = get_cgroup_cpuset(groupid) if len(groups_cpuset & cgroup_cpuset) > 0: return False groups_cpuset |= cgroup_cpuset if not(config_cpuset.issubset(cgroup_cpuset) and cgroup_cpuset.issubset(config_cpuset)): return False # check whether cpuset in resource group union default group is universal set default_cpuset = get_cgroup_cpuset(1) all_cpuset = get_cgroup_cpuset(0) if not (default_cpuset | groups_cpuset).issubset(all_cpuset): return False if not all_cpuset.issubset(default_cpuset | groups_cpuset): return False # if all the cores are allocated to resource group, default group must has a core left if len(default_cpuset & groups_cpuset) > 0 and \ (len(default_cpuset) != 1 or \ (not default_cpuset.issubset(all_cpuset))): return False +conn = pg.connect(dbname="isolation2resgrouptest") config_groups = get_all_group_which_cpuset_is_set() groups_cpuset = set([]) +# check whether cpuset in config and cgroup are same, and have no overlap for config_group in config_groups: groupid = config_group[0] cpuset_value = config_group[1] config_cpuset = parse_cpuset(cpuset_value) cgroup_cpuset = get_cgroup_cpuset(groupid) if len(groups_cpuset & cgroup_cpuset) > 0: return False groups_cpuset |= cgroup_cpuset if not(config_cpuset.issubset(cgroup_cpuset) and cgroup_cpuset.issubset(config_cpuset)): return False +# check whether cpuset in resource group union default group is universal set default_cpuset = get_cgroup_cpuset(1) all_cpuset = get_cgroup_cpuset(0) if not (default_cpuset | groups_cpuset).issubset(all_cpuset): return False if not all_cpuset.issubset(default_cpuset | groups_cpuset): return False # if all the cores are allocated to resource group, default group must has a core left if len(default_cpuset & groups_cpuset) > 0 and \ (len(default_cpuset) != 1 or \ (not default_cpuset.issubset(all_cpuset))): return False return True $$ LANGUAGE plpython3u; CREATE @@ -42,6 +41,11 @@ CREATE CREATE VIEW cancel_all AS SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE query LIKE 'SELECT * FROM busy%'; CREATE +CREATE RESOURCE GROUP rg1_cpuset_test WITH (cpuset='0'); +CREATE +CREATE ROLE role1_cpuset_test RESOURCE GROUP rg1_cpuset_test; +CREATE + GRANT ALL ON busy TO role1_cpuset_test; GRANT @@ -97,8 +101,8 @@ select pg_sleep(2); 11: END; END --- change to cpu_rate_limit while the transaction is running -ALTER RESOURCE GROUP rg1_cpuset_test SET cpu_rate_limit 10; +-- change to cpu_hard_quota_limit while the transaction is running +ALTER RESOURCE GROUP rg1_cpuset_test SET cpu_hard_quota_limit 70; ALTER -- cancel the transaction @@ -115,7 +119,7 @@ ERROR: canceling statement due to user request 11q: ... -- end_ignore --- test whether the cpu_rate_limit had taken effect +-- test whether the cpu_hard_quota_limit had taken effect 10: SET ROLE TO role1_cpuset_test; SET 10: BEGIN; @@ -192,21 +196,21 @@ SELECT check_rules(); ------------- t (1 row) -CREATE RESOURCE GROUP rg1_test_group WITH (memory_limit=10, cpuset='0'); +CREATE RESOURCE GROUP rg1_test_group WITH (cpuset='0'); CREATE SELECT check_rules(); check_rules ------------- t (1 row) -CREATE RESOURCE GROUP rg2_test_group WITH (memory_limit=10, cpuset='1'); +CREATE RESOURCE GROUP rg2_test_group WITH (cpuset='1'); CREATE SELECT check_rules(); check_rules ------------- t (1 row) -ALTER RESOURCE GROUP rg1_test_group SET cpu_rate_limit 1; +ALTER RESOURCE GROUP rg1_test_group SET cpu_hard_quota_limit 1; ALTER SELECT check_rules(); check_rules @@ -220,7 +224,7 @@ SELECT check_rules(); ------------- t (1 row) -ALTER RESOURCE GROUP rg1_test_group SET cpu_rate_limit 1; +ALTER RESOURCE GROUP rg1_test_group SET cpu_hard_quota_limit 1; ALTER SELECT check_rules(); check_rules @@ -267,7 +271,7 @@ SELECT gp_inject_fault('create_resource_group_fail', 'error', 1); ----------------- Success: (1 row) -CREATE RESOURCE GROUP rg1_test_group WITH (memory_limit=10, cpuset='0'); +CREATE RESOURCE GROUP rg1_test_group WITH (cpuset='0'); ERROR: fault triggered, fault name:'create_resource_group_fail' fault type:'error' SELECT groupid, groupname, cpuset FROM gp_toolkit.gp_resgroup_config WHERE cpuset != '-1'; groupid | groupname | cpuset @@ -283,6 +287,10 @@ SELECT gp_inject_fault('create_resource_group_fail', 'reset', 1); ----------------- Success: (1 row) +-- start_ignore +DROP RESOURCE GROUP rg1_test_group; +ERROR: resource group "rg1_test_group" does not exist +-- end_ignore -- test segment/master cpuset CREATE RESOURCE GROUP rg_multi_cpuset1 WITH (concurrency=2, cpuset='0;0'); diff --git a/src/test/isolation2/output/resgroup/resgroup_memory_limit.source b/src/test/isolation2/output/resgroup/resgroup_memory_limit.source index fc2c3f5b0ff..638c322eea0 100644 --- a/src/test/isolation2/output/resgroup/resgroup_memory_limit.source +++ b/src/test/isolation2/output/resgroup/resgroup_memory_limit.source @@ -32,7 +32,7 @@ CREATE -- rg1's shared quota: %52 - %52 => %0 -- system free chunks: 100% - 10% - 30% - 52% => 8% -- memory available to one slot in rg1: 52%/2 + 0% + 8% => 34% -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=52, memory_shared_quota=0); +CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=52, memory_shared_quota=0); CREATE CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; CREATE @@ -130,7 +130,7 @@ DROP -- system free chunks: 100% - 10% - 30% - 52% => 8% -- memory available to one slot in rg1: 15.5% + 21% + 8% => 44.5% -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=52, memory_shared_quota=40); +CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=52, memory_shared_quota=40); CREATE CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; CREATE @@ -242,7 +242,7 @@ DROP -- system free chunks: 100% - 10% - 30% - 52% => 8% -- memory available to one slot in rg1: 52% + 8% => 60% -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=52, memory_shared_quota=100); +CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=52, memory_shared_quota=100); CREATE CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; CREATE @@ -340,7 +340,7 @@ DROP -- system free chunks: 100% - 10% - 30% - 52% => 8% -- memory available to one slot in rg1: 26% + 8% => 34% -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=52, memory_shared_quota=0); +CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=52, memory_shared_quota=0); CREATE CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; CREATE @@ -506,7 +506,7 @@ DROP -- system free chunks: 100% - 10% - 30% - 52% => 8% -- memory available to one slot in rg1: 13% + 26% + 8% => 47% -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=52, memory_shared_quota=50); +CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=52, memory_shared_quota=50); CREATE CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; CREATE @@ -677,9 +677,9 @@ DROP -- system free chunks: 100% - 10% - 30% - 100%*20/100 - 100%*20/100 => 20% -- memory available to one slot in rg1/rg2: 6% + 8% + 20% => 34% -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=20, memory_shared_quota=40); +CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=20, memory_shared_quota=40); CREATE -CREATE RESOURCE GROUP rg2_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=20, memory_shared_quota=40); +CREATE RESOURCE GROUP rg2_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=20, memory_shared_quota=40); CREATE CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; CREATE @@ -850,9 +850,9 @@ DROP -- system free chunks: 100% - 10% - 30% - 30% - 30% => 0 -- memory available to one slot in rg1/rg2: 15% + 0 + 0 => 15% -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0); +CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=30, memory_shared_quota=0); CREATE -CREATE RESOURCE GROUP rg2_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0); +CREATE RESOURCE GROUP rg2_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=30, memory_shared_quota=0); CREATE CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; CREATE diff --git a/src/test/isolation2/output/resgroup/resgroup_memory_runaway.source b/src/test/isolation2/output/resgroup/resgroup_memory_runaway.source index 030654258ce..1fef409e1bb 100644 --- a/src/test/isolation2/output/resgroup/resgroup_memory_runaway.source +++ b/src/test/isolation2/output/resgroup/resgroup_memory_runaway.source @@ -67,7 +67,7 @@ CREATE -- rg1's shared quota: 20% - 20% => %0 -- system free chunks: 100% - 10% - 30% - 20% => 40% -- global area safe threshold: 40% / 2 = 20% -1: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=20, memory_shared_quota=0); +1: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=20, memory_shared_quota=0); CREATE 1: CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; CREATE @@ -126,7 +126,7 @@ DROP -- rg1's shared quota: %20 - %10 => %10 -- system free chunks: 100% - 10% - 30% - 20% => 40% -- safe threshold: 40% / 2 = 20% -1: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=20, memory_shared_quota=50); +1: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=20, memory_shared_quota=50); CREATE 1: CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; CREATE @@ -197,9 +197,9 @@ DROP -- rg2's expected: 100% * 20% => 20% -- system free chunks: 100% - 10% - 30% - 20% - 20%=> 20% -- safe threshold: 20% / 2 = 10% -1: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=20, memory_shared_quota=50); +1: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=20, memory_shared_quota=50); CREATE -1: CREATE RESOURCE GROUP rg2_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=20, memory_shared_quota=0); +1: CREATE RESOURCE GROUP rg2_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=20, memory_shared_quota=0); CREATE 1: CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; CREATE @@ -264,7 +264,7 @@ DROP ! gpstop -ari; -- end_ignore -1: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=60, memory_shared_quota=50); +1: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=60, memory_shared_quota=50); CREATE 1: CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; CREATE diff --git a/src/test/isolation2/output/resgroup/resgroup_memory_statistic.source b/src/test/isolation2/output/resgroup/resgroup_memory_statistic.source index e712194ef0d..51ead15a68b 100644 --- a/src/test/isolation2/output/resgroup/resgroup_memory_statistic.source +++ b/src/test/isolation2/output/resgroup/resgroup_memory_statistic.source @@ -30,11 +30,11 @@ CREATE CREATE OR REPLACE VIEW memory_result AS SELECT rsgname, ismaster, round_test(avg(memory_usage), 10) AS avg_mem FROM( SELECT rsgname, CASE (j->'key')::text WHEN '"-1"'::text THEN 1 ELSE 0 END AS ismaster, ((j->'value')->>'used')::int AS memory_usage FROM( SELECT rsgname, row_to_json(json_each(memory_usage::json)) AS j FROM gp_toolkit.gp_resgroup_status WHERE rsgname='rg1_memory_test' OR rsgname='rg2_memory_test' )a )b GROUP BY (rsgname, ismaster) ORDER BY rsgname, ismaster; CREATE -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=30); +CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=30); CREATE CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; CREATE -CREATE RESOURCE GROUP rg2_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=30); +CREATE RESOURCE GROUP rg2_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=30); CREATE CREATE ROLE role2_memory_test RESOURCE GROUP rg2_memory_test; CREATE diff --git a/src/test/isolation2/output/resgroup/resgroup_move_query.source b/src/test/isolation2/output/resgroup/resgroup_move_query.source index 056ddc47f40..0b60169fed7 100644 --- a/src/test/isolation2/output/resgroup/resgroup_move_query.source +++ b/src/test/isolation2/output/resgroup/resgroup_move_query.source @@ -11,14 +11,6 @@ -- -- end_matchsubs -CREATE OR REPLACE FUNCTION resGroupPalloc(float) RETURNS int AS '@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc' LANGUAGE C READS SQL DATA; -CREATE - -CREATE OR REPLACE FUNCTION hold_memory_by_percent(int, float) RETURNS int AS $$ SELECT * FROM resGroupPalloc($2) $$ LANGUAGE sql; -CREATE - -CREATE OR REPLACE FUNCTION hold_memory_by_percent_on_qe(int, float) RETURNS int AS $$ SELECT resGroupPalloc($2) FROM gp_dist_random('gp_id') $$ LANGUAGE sql; -CREATE -- check whether a query running in the specific group -- @param pid: the pid of QD -- @param groupname: resource group id @@ -43,7 +35,7 @@ DROP DROP RESOURCE GROUP rg_move_query; ERROR: resource group "rg_move_query" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_move_query WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_move_query WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_move_query RESOURCE GROUP rg_move_query; CREATE @@ -83,149 +75,7 @@ BEGIN 2: END; END --- test3: cannot move sessions that don't have enough memory on QD -CREATE RESOURCE GROUP rg_move_query_mem_small WITH (concurrency=1, cpu_rate_limit=20, memory_limit=10); -CREATE -CREATE ROLE role_move_query_mem_small RESOURCE GROUP rg_move_query_mem_small; -CREATE -1: SET ROLE role_move_query; -SET -1: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent(1,1.0); - hold_memory_by_percent ------------------------- - 0 -(1 row) -SELECT pg_resgroup_move_query(pid, 'rg_move_query_mem_small') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent%' AND state = 'idle in transaction'; -ERROR: group 115315 doesn't have enough memory on master, expect:2956, available:1478 (resgroup.c:4582) -SELECT is_session_in_group(pid, 'rg_move_query_mem_small') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent%' AND state = 'idle in transaction'; - is_session_in_group ---------------------- - f -(1 row) -1: END; -END -1q: ... - --- test4: cannot move sessions that don't have enough memory on QE -1: SET ROLE role_move_query; -SET -1: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent_on_qe(1,1.0); - hold_memory_by_percent_on_qe ------------------------------- - 0 -(1 row) -SELECT pg_resgroup_move_query(pid, 'rg_move_query_mem_small') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND state = 'idle in transaction'; -ERROR: group 115315 doesn't have enough memory on segment, expect:2956, available:1478 (resgroup.c:4608) -SELECT is_session_in_group(pid, 'rg_move_query_mem_small') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND state = 'idle in transaction'; - is_session_in_group ---------------------- - f -(1 row) -1: END; -END -1q: ... - --- test5: move query will wait if the destination group doesn't have slot -1: SET ROLE role_move_query; -SET -1: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent_on_qe(1,0.1); - hold_memory_by_percent_on_qe ------------------------------- - 0 -(1 row) -2: SET ROLE role_move_query_mem_small; -SET -2: BEGIN; -BEGIN -3&: SELECT pg_resgroup_move_query(pid, 'rg_move_query_mem_small') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND state = 'idle in transaction'; -2: END; -END -3<: <... completed> - pg_resgroup_move_query ------------------------- - t -(1 row) -3: SELECT is_session_in_group(pid, 'rg_move_query_mem_small') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND state = 'idle in transaction'; - is_session_in_group ---------------------- - t -(1 row) -1: END; -END -1q: ... -2q: ... -3q: ... - --- test6: the destination group will wake up 'pg_resgroup_move_query' when a new slot become available -1: SET ROLE role_move_query; -SET -1&: SELECT pg_sleep(5); -2: SET ROLE role_move_query_mem_small; -SET -2&: SELECT pg_sleep(10); -3&: SELECT pg_resgroup_move_query(pid, 'rg_move_query') FROM pg_stat_activity WHERE query LIKE '%pg_sleep(10)%' AND rsgname='rg_move_query_mem_small'; -1<: <... completed> - pg_sleep ----------- - -(1 row) --- connection 1 finished, it will wake up connection 3 -3<: <... completed> - pg_resgroup_move_query ------------------------- - t -(1 row) -3: SELECT rsgname, query FROM pg_stat_activity WHERE state = 'active' and query like 'SELECT%'; - rsgname | query ----------------+---------------------------------------------------------------------------------------------- - rg_move_query | SELECT pg_sleep(10); - admin_group | SELECT rsgname, query FROM pg_stat_activity WHERE state = 'active' and query like 'SELECT%'; -(2 rows) -2<: <... completed> - pg_sleep ----------- - -(1 row) -1q: ... -2q: ... -3q: ... - --- test7: the destination group memory_limit is 0, meaning use the global shared memory -1: ALTER RESOURCE GROUP rg_move_query SET memory_limit 0; -ALTER -1: SET ROLE role_move_query_mem_small; -SET -1: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent_on_qe(1,0.1); - hold_memory_by_percent_on_qe ------------------------------- - 0 -(1 row) -2: SELECT pg_resgroup_move_query(pid, 'rg_move_query') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND rsgname='rg_move_query_mem_small'; - pg_resgroup_move_query ------------------------- - t -(1 row) -2: SELECT is_session_in_group(pid, 'rg_move_query') FROM pg_stat_activity WHERE query LIKE '%hold_memory_by_percent_on_qe%' AND state = 'idle in transaction'; - is_session_in_group ---------------------- - t -(1 row) -1q: ... -2q: ... - DROP ROLE role_move_query; DROP DROP RESOURCE GROUP rg_move_query; DROP -DROP ROLE role_move_query_mem_small; -DROP -DROP RESOURCE GROUP rg_move_query_mem_small; -DROP diff --git a/src/test/isolation2/sql/resgroup/resgroup_alter_concurrency.sql b/src/test/isolation2/sql/resgroup/resgroup_alter_concurrency.sql index 0a3c01e2afe..9bd71776317 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_alter_concurrency.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_alter_concurrency.sql @@ -3,8 +3,7 @@ DROP ROLE IF EXISTS role_concurrency_test; -- start_ignore DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH -(concurrency=1, cpu_rate_limit=20, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=10); +CREATE RESOURCE GROUP rg_concurrency_test WITH(concurrency=1, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; -- After a 'q' command the client connection is disconnected but the @@ -82,177 +81,8 @@ SELECT * FROM rg_activity_status; SELECT * FROM rg_activity_status; -- --- 3. increase both concurrency & memory_shared_quota after pending queries +-- 3. decrease concurrency -- - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 1; -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_SHARED_QUOTA 60; - -11:SET ROLE role_concurrency_test; -11:BEGIN; - -21:SET ROLE role_concurrency_test; -22:SET ROLE role_concurrency_test; -21&:BEGIN; -22&:BEGIN; - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2; - -SELECT * FROM rg_activity_status; - -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_SHARED_QUOTA 20; - -21<: - -SELECT * FROM rg_activity_status; - -11:END; -11q: -22<: - -SELECT * FROM rg_activity_status; - -21:END; -22:END; -21q: -22q: - -SELECT * FROM rg_activity_status; - --- --- 4. increase both concurrency & memory_shared_quota before pending queries --- - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 1; -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_SHARED_QUOTA 60; - -11:SET ROLE role_concurrency_test; -11:BEGIN; - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2; -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_SHARED_QUOTA 20; - -21:SET ROLE role_concurrency_test; -22:SET ROLE role_concurrency_test; -21:BEGIN; -22&:BEGIN; - -SELECT * FROM rg_activity_status; - -11:END; -11q: -22<: - -SELECT * FROM rg_activity_status; - -21:END; -22:END; -21q: -22q: - -SELECT * FROM rg_activity_status; - --- --- 5. increase both concurrency & memory_limit after pending queries --- - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 1; -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_SHARED_QUOTA 0; -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_LIMIT 30; - --- proc 11 gets a quota of 30/1=30 -11:SET ROLE role_concurrency_test; -11:BEGIN; - -21:SET ROLE role_concurrency_test; -22:SET ROLE role_concurrency_test; -21&:BEGIN; -22&:BEGIN; - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2; --- now a new query needs a quota of 30/2=15 to run, --- there is no free quota at the moment, so 21 & 22 are still pending -SELECT * FROM rg_activity_status; - -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_LIMIT 50; --- now a new query needs a quota of 50/2=25 to run, --- but there is only 50-30=20 free quota, so 21 & 22 are still pending -SELECT * FROM rg_activity_status; - -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_LIMIT 60; --- now a new query needs a quota of 60/2=30 to run, --- and there is 60-30=30 free quota, so 21 gets executed and 22 is still pending - -21<: - -SELECT * FROM rg_activity_status; - -11:END; --- 11 releases its quota, so there is now 30 free quota, --- so 22 gets executed -11q: -22<: - -SELECT * FROM rg_activity_status; - -21:END; -22:END; -21q: -22q: - -SELECT * FROM rg_activity_status; - --- --- 6. increase both concurrency & memory_limit before pending queries --- - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 1; -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_SHARED_QUOTA 0; -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_LIMIT 30; - --- proc 11 gets a quota of 30/1=30 -11:SET ROLE role_concurrency_test; -11:BEGIN; - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2; --- now a new query needs a quota of 30/2=15 to run, --- there is no free quota at the moment -SELECT * FROM rg_activity_status; - -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_LIMIT 60; --- now a new query needs a quota of 60/2=30 to run, --- and there is 60-30=30 free quota, --- so one new query can get executed immediately - -21:SET ROLE role_concurrency_test; -22:SET ROLE role_concurrency_test; -21:BEGIN; --- proc 21 gets executed, there is no free quota now, --- so proc 22 is pending -22&:BEGIN; - -SELECT * FROM rg_activity_status; - -11:END; --- 11 releases its quota, so there is now 30 free quota, --- so 22 gets executed -11q: -22<: - -SELECT * FROM rg_activity_status; - -21:END; -22:END; -21q: -22q: - -SELECT * FROM rg_activity_status; - --- --- 7. decrease concurrency --- -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_LIMIT 50; -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_SHARED_QUOTA 0; ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 10; 11:SET ROLE role_concurrency_test; 11:BEGIN; @@ -283,12 +113,11 @@ SELECT pg_sleep(1); -- end_ignore -- --- 8. increase concurrency from 0 +-- 4. increase concurrency from 0 -- DROP ROLE role_concurrency_test; DROP RESOURCE GROUP rg_concurrency_test; -CREATE RESOURCE GROUP rg_concurrency_test WITH -(concurrency=0, cpu_rate_limit=20, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=10); +CREATE RESOURCE GROUP rg_concurrency_test WITH(concurrency=0, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 11:SET ROLE role_concurrency_test; @@ -304,7 +133,7 @@ SELECT * FROM rg_activity_status; 11q: -- --- 9.1 decrease concurrency to 0, +-- 5.1 decrease concurrency to 0, -- without running queries, -- without pending queries. -- @@ -316,7 +145,7 @@ ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 0; SELECT * FROM rg_activity_status; -- --- 9.2 decrease concurrency to 0, +-- 5.2 decrease concurrency to 0, -- with running queries, -- without pending queries. -- @@ -335,7 +164,7 @@ SELECT * FROM rg_activity_status; 11q: -- --- 9.3 decrease concurrency to 0, +-- 5.3 decrease concurrency to 0, -- with running queries, -- with pending queries. -- @@ -362,13 +191,13 @@ WHERE wait_event_type='ResourceGroup' AND rsgname='rg_concurrency_test'; 12q: SELECT * FROM rg_activity_status; --- 10: drop a resgroup with concurrency=0 and pending queries +-- 6: drop a resgroup with concurrency=0 and pending queries DROP ROLE IF EXISTS role_concurrency_test; -- start_ignore DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 61:SET ROLE role_concurrency_test; 61&:BEGIN; @@ -380,13 +209,13 @@ DROP RESOURCE GROUP rg_concurrency_test; 61:END; 61q: --- 11: drop a role with concurrency=0 and pending queries +-- 7: drop a role with concurrency=0 and pending queries DROP ROLE IF EXISTS role_concurrency_test; -- start_ignore DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 61:SET ROLE role_concurrency_test; 61&:BEGIN; diff --git a/src/test/isolation2/sql/resgroup/resgroup_alter_memory_spill_ratio.sql b/src/test/isolation2/sql/resgroup/resgroup_alter_memory_spill_ratio.sql deleted file mode 100644 index cd86cbccc80..00000000000 --- a/src/test/isolation2/sql/resgroup/resgroup_alter_memory_spill_ratio.sql +++ /dev/null @@ -1,39 +0,0 @@ --- start_ignore -DROP RESOURCE GROUP rg_spill_test; --- end_ignore -CREATE RESOURCE GROUP rg_spill_test WITH -(concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=20, memory_spill_ratio=10); - -CREATE OR REPLACE VIEW rg_spill_status AS - SELECT groupname, memory_shared_quota, memory_spill_ratio - FROM gp_toolkit.gp_resgroup_config - WHERE groupname='rg_spill_test'; - --- ALTER MEMORY_SPILL_RATIO - -SELECT * FROM rg_spill_status; - --- positive -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 20; -SELECT * FROM rg_spill_status; - --- positive, memory_spill_ratio range is [0, 100] -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 0; -SELECT * FROM rg_spill_status; - --- positive: no limit on the sum of shared and spill -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 81; -SELECT * FROM rg_spill_status; - --- negative: memory_spill_ratio is invalid -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 20.0; -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO a; -SELECT * FROM rg_spill_status; - --- negative: memory_spill_ratio is larger than RESGROUP_MAX_MEMORY_SPILL_RATIO -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 101; -SELECT * FROM rg_spill_status; - --- cleanup -DROP VIEW rg_spill_status; -DROP RESOURCE GROUP rg_spill_test; diff --git a/src/test/isolation2/sql/resgroup/resgroup_assign_slot_fail.sql b/src/test/isolation2/sql/resgroup/resgroup_assign_slot_fail.sql index bf25e6c2ab2..fd58556249c 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_assign_slot_fail.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_assign_slot_fail.sql @@ -5,7 +5,7 @@ DROP ROLE IF EXISTS role_test; -- start_ignore DROP RESOURCE GROUP rg_test; -- end_ignore -CREATE RESOURCE GROUP rg_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test WITH (concurrency=2, cpu_hard_quota_limit=10); CREATE ROLE role_test RESOURCE GROUP rg_test; 1: SET ROLE role_test; diff --git a/src/test/isolation2/sql/resgroup/resgroup_bypass_memory_limit.sql b/src/test/isolation2/sql/resgroup/resgroup_bypass_memory_limit.sql deleted file mode 100644 index 15eac23c04e..00000000000 --- a/src/test/isolation2/sql/resgroup/resgroup_bypass_memory_limit.sql +++ /dev/null @@ -1,482 +0,0 @@ --- --- set timezone will accumulate the memory usage in session. --- here is used to test bypass memory limit is query level instead of session level. --- -SET TIMEZONE TO 'Japan'; -SET TIMEZONE TO 'Portugal'; -SET TIMEZONE TO 'Canada/Eastern'; -SET TIMEZONE TO 'Canada/Central'; -SET TIMEZONE TO 'Canada/Newfoundland'; -SET TIMEZONE TO 'Canada/Mountain'; -SET TIMEZONE TO 'Canada/Saskatchewan'; -SET TIMEZONE TO 'Canada/Yukon'; -SET TIMEZONE TO 'Canada/Pacific'; -SET TIMEZONE TO 'Canada/Atlantic'; -SET TIMEZONE TO 'CET'; -SET TIMEZONE TO 'ROK'; -SET TIMEZONE TO 'Arctic/Longyearbyen'; -SET TIMEZONE TO 'PRC'; -SET TIMEZONE TO 'GMT-0'; -SET TIMEZONE TO 'Iran'; -SET TIMEZONE TO 'GB-Eire'; -SET TIMEZONE TO 'Jamaica'; -SET TIMEZONE TO 'Europe/Tiraspol'; -SET TIMEZONE TO 'Europe/Vaduz'; -SET TIMEZONE TO 'Europe/Berlin'; -SET TIMEZONE TO 'Europe/San_Marino'; -SET TIMEZONE TO 'Europe/Guernsey'; -SET TIMEZONE TO 'Europe/Ljubljana'; -SET TIMEZONE TO 'Europe/Simferopol'; -SET TIMEZONE TO 'Europe/Belgrade'; -SET TIMEZONE TO 'Europe/Chisinau'; -SET TIMEZONE TO 'Europe/London'; -SET TIMEZONE TO 'Europe/Vatican'; -SET TIMEZONE TO 'Europe/Skopje'; -SET TIMEZONE TO 'Europe/Saratov'; -SET TIMEZONE TO 'Europe/Jersey'; -SET TIMEZONE TO 'Europe/Samara'; -SET TIMEZONE TO 'Europe/Oslo'; -SET TIMEZONE TO 'Europe/Helsinki'; -SET TIMEZONE TO 'Europe/Luxembourg'; -SET TIMEZONE TO 'Europe/Bucharest'; -SET TIMEZONE TO 'Europe/Podgorica'; -SET TIMEZONE TO 'Europe/Madrid'; -SET TIMEZONE TO 'Europe/Sarajevo'; -SET TIMEZONE TO 'Europe/Busingen'; -SET TIMEZONE TO 'Europe/Monaco'; -SET TIMEZONE TO 'Europe/Belfast'; -SET TIMEZONE TO 'Europe/Zagreb'; -SET TIMEZONE TO 'Europe/Warsaw'; -SET TIMEZONE TO 'Europe/Sofia'; -SET TIMEZONE TO 'Europe/Tallinn'; -SET TIMEZONE TO 'Europe/Brussels'; -SET TIMEZONE TO 'Europe/Isle_of_Man'; -SET TIMEZONE TO 'Europe/Stockholm'; -SET TIMEZONE TO 'Europe/Lisbon'; -SET TIMEZONE TO 'Europe/Istanbul'; -SET TIMEZONE TO 'Europe/Copenhagen'; -SET TIMEZONE TO 'Europe/Tirane'; -SET TIMEZONE TO 'Europe/Dublin'; -SET TIMEZONE TO 'Europe/Gibraltar'; -SET TIMEZONE TO 'Europe/Athens'; -SET TIMEZONE TO 'Europe/Zurich'; -SET TIMEZONE TO 'Europe/Vilnius'; -SET TIMEZONE TO 'Europe/Malta'; -SET TIMEZONE TO 'Europe/Riga'; -SET TIMEZONE TO 'Europe/Vienna'; -SET TIMEZONE TO 'Europe/Moscow'; -SET TIMEZONE TO 'Europe/Ulyanovsk'; -SET TIMEZONE TO 'Europe/Rome'; -SET TIMEZONE TO 'Europe/Kirov'; -SET TIMEZONE TO 'Europe/Uzhgorod'; -SET TIMEZONE TO 'Europe/Kaliningrad'; -SET TIMEZONE TO 'Europe/Mariehamn'; -SET TIMEZONE TO 'Europe/Budapest'; -SET TIMEZONE TO 'Europe/Astrakhan'; -SET TIMEZONE TO 'Europe/Volgograd'; -SET TIMEZONE TO 'Europe/Kiev'; -SET TIMEZONE TO 'Europe/Paris'; -SET TIMEZONE TO 'Europe/Andorra'; -SET TIMEZONE TO 'Europe/Amsterdam'; -SET TIMEZONE TO 'Europe/Nicosia'; -SET TIMEZONE TO 'Europe/Bratislava'; -SET TIMEZONE TO 'Europe/Minsk'; -SET TIMEZONE TO 'Europe/Prague'; -SET TIMEZONE TO 'Europe/Zaporozhye'; -SET TIMEZONE TO 'Egypt'; -SET TIMEZONE TO 'Zulu'; -SET TIMEZONE TO 'Etc/GMT+6'; -SET TIMEZONE TO 'Etc/GMT-8'; -SET TIMEZONE TO 'Etc/GMT-0'; -SET TIMEZONE TO 'Etc/GMT+3'; -SET TIMEZONE TO 'Etc/GMT-12'; -SET TIMEZONE TO 'Etc/GMT+7'; -SET TIMEZONE TO 'Etc/GMT-14'; -SET TIMEZONE TO 'Etc/GMT-10'; -SET TIMEZONE TO 'Etc/Zulu'; -SET TIMEZONE TO 'Etc/GMT-4'; -SET TIMEZONE TO 'Etc/GMT+5'; -SET TIMEZONE TO 'Etc/GMT'; -SET TIMEZONE TO 'Etc/GMT-11'; -SET TIMEZONE TO 'Etc/GMT-5'; -SET TIMEZONE TO 'Etc/GMT+1'; -SET TIMEZONE TO 'Etc/GMT+10'; -SET TIMEZONE TO 'Etc/UCT'; -SET TIMEZONE TO 'Etc/GMT+11'; -SET TIMEZONE TO 'Etc/GMT-3'; -SET TIMEZONE TO 'Etc/Greenwich'; -SET TIMEZONE TO 'Etc/GMT-13'; -SET TIMEZONE TO 'Etc/GMT-9'; -SET TIMEZONE TO 'Etc/GMT-7'; -SET TIMEZONE TO 'Etc/GMT-6'; -SET TIMEZONE TO 'Etc/GMT+4'; -SET TIMEZONE TO 'Etc/GMT+2'; -SET TIMEZONE TO 'Etc/Universal'; -SET TIMEZONE TO 'Etc/GMT+9'; -SET TIMEZONE TO 'Etc/GMT+0'; -SET TIMEZONE TO 'Etc/GMT-2'; -SET TIMEZONE TO 'Etc/GMT+12'; -SET TIMEZONE TO 'Etc/GMT-1'; -SET TIMEZONE TO 'Etc/UTC'; -SET TIMEZONE TO 'Etc/GMT+8'; -SET TIMEZONE TO 'Etc/GMT0'; -SET TIMEZONE TO 'Australia/Currie'; -SET TIMEZONE TO 'Australia/Lindeman'; -SET TIMEZONE TO 'Australia/Melbourne'; -SET TIMEZONE TO 'Australia/Yancowinna'; -SET TIMEZONE TO 'Australia/Eucla'; -SET TIMEZONE TO 'Australia/South'; -SET TIMEZONE TO 'Australia/Lord_Howe'; -SET TIMEZONE TO 'Australia/Perth'; -SET TIMEZONE TO 'Australia/Canberra'; -SET TIMEZONE TO 'Australia/Hobart'; -SET TIMEZONE TO 'Australia/North'; -SET TIMEZONE TO 'Australia/Broken_Hill'; -SET TIMEZONE TO 'Australia/NSW'; -SET TIMEZONE TO 'Australia/Victoria'; -SET TIMEZONE TO 'Australia/Adelaide'; -SET TIMEZONE TO 'Australia/Queensland'; -SET TIMEZONE TO 'Australia/Darwin'; -SET TIMEZONE TO 'Australia/West'; -SET TIMEZONE TO 'Australia/LHI'; -SET TIMEZONE TO 'Australia/ACT'; -SET TIMEZONE TO 'Australia/Sydney'; -SET TIMEZONE TO 'Australia/Brisbane'; -SET TIMEZONE TO 'Australia/Tasmania'; -SET TIMEZONE TO 'W-SU'; -SET TIMEZONE TO 'Africa/Kigali'; -SET TIMEZONE TO 'Africa/Dar_es_Salaam'; -SET TIMEZONE TO 'Africa/Niamey'; -SET TIMEZONE TO 'Africa/Brazzaville'; -SET TIMEZONE TO 'Africa/Addis_Ababa'; -SET TIMEZONE TO 'Africa/Kampala'; -SET TIMEZONE TO 'Africa/Kinshasa'; -SET TIMEZONE TO 'Africa/Lagos'; -SET TIMEZONE TO 'Africa/Accra'; -SET TIMEZONE TO 'Africa/Bangui'; -SET TIMEZONE TO 'Africa/Maputo'; -SET TIMEZONE TO 'Africa/Asmara'; -SET TIMEZONE TO 'Africa/Juba'; -SET TIMEZONE TO 'Africa/Tunis'; -SET TIMEZONE TO 'Africa/Bissau'; -SET TIMEZONE TO 'Africa/Freetown'; -SET TIMEZONE TO 'Africa/Tripoli'; -SET TIMEZONE TO 'Africa/Windhoek'; -SET TIMEZONE TO 'Africa/Casablanca'; -SET TIMEZONE TO 'Africa/Mbabane'; -SET TIMEZONE TO 'Africa/Harare'; -SET TIMEZONE TO 'Africa/Mogadishu'; -SET TIMEZONE TO 'Africa/Banjul'; -SET TIMEZONE TO 'Africa/Djibouti'; -SET TIMEZONE TO 'Africa/Malabo'; -SET TIMEZONE TO 'Africa/Nouakchott'; -SET TIMEZONE TO 'Africa/Lubumbashi'; -SET TIMEZONE TO 'Africa/Luanda'; -SET TIMEZONE TO 'Africa/Bamako'; -SET TIMEZONE TO 'Africa/Nairobi'; -SET TIMEZONE TO 'Africa/Lusaka'; -SET TIMEZONE TO 'Africa/Ouagadougou'; -SET TIMEZONE TO 'Africa/Asmera'; -SET TIMEZONE TO 'Africa/Douala'; -SET TIMEZONE TO 'Africa/Dakar'; -SET TIMEZONE TO 'Africa/Khartoum'; -SET TIMEZONE TO 'Africa/Libreville'; -SET TIMEZONE TO 'Africa/Maseru'; -SET TIMEZONE TO 'Africa/Lome'; -SET TIMEZONE TO 'Africa/Abidjan'; -SET TIMEZONE TO 'Africa/Ceuta'; -SET TIMEZONE TO 'Africa/El_Aaiun'; -SET TIMEZONE TO 'Africa/Algiers'; -SET TIMEZONE TO 'Africa/Ndjamena'; -SET TIMEZONE TO 'Africa/Gaborone'; -SET TIMEZONE TO 'Africa/Blantyre'; -SET TIMEZONE TO 'Africa/Sao_Tome'; -SET TIMEZONE TO 'Africa/Monrovia'; -SET TIMEZONE TO 'Africa/Johannesburg'; -SET TIMEZONE TO 'Africa/Timbuktu'; -SET TIMEZONE TO 'Africa/Cairo'; -SET TIMEZONE TO 'Africa/Porto-Novo'; -SET TIMEZONE TO 'Africa/Bujumbura'; -SET TIMEZONE TO 'Africa/Conakry'; -SET TIMEZONE TO 'PST8PDT'; -SET TIMEZONE TO 'Indian/Maldives'; -SET TIMEZONE TO 'Indian/Mahe'; -SET TIMEZONE TO 'Indian/Christmas'; -SET TIMEZONE TO 'Indian/Mauritius'; -SET TIMEZONE TO 'Indian/Chagos'; -SET TIMEZONE TO 'Indian/Mayotte'; -SET TIMEZONE TO 'Indian/Reunion'; -SET TIMEZONE TO 'Indian/Antananarivo'; -SET TIMEZONE TO 'Indian/Kerguelen'; -SET TIMEZONE TO 'Indian/Cocos'; -SET TIMEZONE TO 'Indian/Comoro'; -SET TIMEZONE TO 'MET'; -SET TIMEZONE TO 'ROC'; -SET TIMEZONE TO 'EET'; -SET TIMEZONE TO 'GMT'; -SET TIMEZONE TO 'EST'; -SET TIMEZONE TO 'Hongkong'; -SET TIMEZONE TO 'Turkey'; -SET TIMEZONE TO 'Iceland'; -SET TIMEZONE TO 'Poland'; -SET TIMEZONE TO 'GB'; -SET TIMEZONE TO 'Israel'; -SET TIMEZONE TO 'UCT'; -SET TIMEZONE TO 'Navajo'; -SET TIMEZONE TO 'Greenwich'; -SET TIMEZONE TO 'Antarctica/Davis'; -SET TIMEZONE TO 'Antarctica/Casey'; -SET TIMEZONE TO 'Antarctica/Troll'; -SET TIMEZONE TO 'Antarctica/DumontDUrville'; -SET TIMEZONE TO 'Antarctica/South_Pole'; -SET TIMEZONE TO 'Antarctica/Vostok'; -SET TIMEZONE TO 'Antarctica/Syowa'; -SET TIMEZONE TO 'Antarctica/Mawson'; -SET TIMEZONE TO 'Antarctica/McMurdo'; -SET TIMEZONE TO 'Antarctica/Palmer'; -SET TIMEZONE TO 'Antarctica/Macquarie'; -SET TIMEZONE TO 'Antarctica/Rothera'; -SET TIMEZONE TO 'CST6CDT'; -SET TIMEZONE TO 'Libya'; -SET TIMEZONE TO 'Kwajalein'; -SET TIMEZONE TO 'Cuba'; -SET TIMEZONE TO 'Mexico/BajaNorte'; -SET TIMEZONE TO 'Mexico/General'; -SET TIMEZONE TO 'Mexico/BajaSur'; -SET TIMEZONE TO 'WET'; -SET TIMEZONE TO 'Singapore'; -SET TIMEZONE TO 'Brazil/DeNoronha'; -SET TIMEZONE TO 'Brazil/Acre'; -SET TIMEZONE TO 'Brazil/West'; -SET TIMEZONE TO 'Brazil/East'; -SET TIMEZONE TO 'MST7MDT'; -SET TIMEZONE TO 'US/Aleutian'; -SET TIMEZONE TO 'US/Samoa'; -SET TIMEZONE TO 'US/Eastern'; -SET TIMEZONE TO 'US/Alaska'; -SET TIMEZONE TO 'US/Indiana-Starke'; -SET TIMEZONE TO 'US/Michigan'; -SET TIMEZONE TO 'US/Central'; -SET TIMEZONE TO 'US/East-Indiana'; -SET TIMEZONE TO 'US/Mountain'; -SET TIMEZONE TO 'US/Hawaii'; -SET TIMEZONE TO 'US/Pacific'; -SET TIMEZONE TO 'US/Arizona'; -SET TIMEZONE TO 'Universal'; -SET TIMEZONE TO 'Asia/Dushanbe'; -SET TIMEZONE TO 'Asia/Oral'; -SET TIMEZONE TO 'Asia/Magadan'; -SET TIMEZONE TO 'Asia/Hong_Kong'; -SET TIMEZONE TO 'Asia/Aden'; -SET TIMEZONE TO 'Asia/Tomsk'; -SET TIMEZONE TO 'Asia/Aqtobe'; -SET TIMEZONE TO 'Asia/Pontianak'; -SET TIMEZONE TO 'Asia/Kuching'; -SET TIMEZONE TO 'Asia/Tbilisi'; -SET TIMEZONE TO 'Asia/Novosibirsk'; -SET TIMEZONE TO 'Asia/Chita'; -SET TIMEZONE TO 'Asia/Hebron'; -SET TIMEZONE TO 'Asia/Choibalsan'; -SET TIMEZONE TO 'Asia/Qyzylorda'; -SET TIMEZONE TO 'Asia/Jakarta'; -SET TIMEZONE TO 'Asia/Colombo'; -SET TIMEZONE TO 'Asia/Dili'; -SET TIMEZONE TO 'Asia/Thimphu'; -SET TIMEZONE TO 'Asia/Tashkent'; -SET TIMEZONE TO 'Asia/Ujung_Pandang'; -SET TIMEZONE TO 'Asia/Ulaanbaatar'; -SET TIMEZONE TO 'Asia/Jerusalem'; -SET TIMEZONE TO 'Asia/Pyongyang'; -SET TIMEZONE TO 'Asia/Vladivostok'; -SET TIMEZONE TO 'Asia/Samarkand'; -SET TIMEZONE TO 'Asia/Beirut'; -SET TIMEZONE TO 'Asia/Shanghai'; -SET TIMEZONE TO 'Asia/Kabul'; -SET TIMEZONE TO 'Asia/Bangkok'; -SET TIMEZONE TO 'Asia/Almaty'; -SET TIMEZONE TO 'Asia/Kathmandu'; -SET TIMEZONE TO 'Asia/Ust-Nera'; -SET TIMEZONE TO 'Asia/Yangon'; -SET TIMEZONE TO 'Asia/Novokuznetsk'; -SET TIMEZONE TO 'Asia/Qatar'; -SET TIMEZONE TO 'Asia/Baghdad'; -SET TIMEZONE TO 'Asia/Srednekolymsk'; -SET TIMEZONE TO 'Asia/Hovd'; -SET TIMEZONE TO 'Asia/Istanbul'; -SET TIMEZONE TO 'Asia/Omsk'; -SET TIMEZONE TO 'Asia/Macau'; -SET TIMEZONE TO 'Asia/Yekaterinburg'; -SET TIMEZONE TO 'Asia/Vientiane'; -SET TIMEZONE TO 'Asia/Famagusta'; -SET TIMEZONE TO 'Asia/Urumqi'; -SET TIMEZONE TO 'Asia/Kuwait'; -SET TIMEZONE TO 'Asia/Dhaka'; -SET TIMEZONE TO 'Asia/Ulan_Bator'; -SET TIMEZONE TO 'Asia/Dubai'; -SET TIMEZONE TO 'Asia/Saigon'; -SET TIMEZONE TO 'Asia/Muscat'; -SET TIMEZONE TO 'Asia/Tehran'; -SET TIMEZONE TO 'Asia/Ho_Chi_Minh'; -SET TIMEZONE TO 'Asia/Aqtau'; -SET TIMEZONE TO 'Asia/Bishkek'; -SET TIMEZONE TO 'Asia/Kashgar'; -SET TIMEZONE TO 'Asia/Gaza'; -SET TIMEZONE TO 'Asia/Riyadh'; -SET TIMEZONE TO 'Asia/Ashkhabad'; -SET TIMEZONE TO 'Asia/Khandyga'; -SET TIMEZONE TO 'Asia/Anadyr'; -SET TIMEZONE TO 'Asia/Brunei'; -SET TIMEZONE TO 'Asia/Phnom_Penh'; -SET TIMEZONE TO 'Asia/Baku'; -SET TIMEZONE TO 'Asia/Sakhalin'; -SET TIMEZONE TO 'Asia/Atyrau'; -SET TIMEZONE TO 'Asia/Ashgabat'; -SET TIMEZONE TO 'Asia/Thimbu'; -SET TIMEZONE TO 'Asia/Damascus'; -SET TIMEZONE TO 'Asia/Kolkata'; -SET TIMEZONE TO 'Asia/Jayapura'; -SET TIMEZONE TO 'Asia/Tokyo'; -SET TIMEZONE TO 'Asia/Katmandu'; -SET TIMEZONE TO 'Asia/Bahrain'; -SET TIMEZONE TO 'Asia/Tel_Aviv'; -SET TIMEZONE TO 'Asia/Singapore'; -SET TIMEZONE TO 'Asia/Krasnoyarsk'; -SET TIMEZONE TO 'Asia/Seoul'; -SET TIMEZONE TO 'Asia/Barnaul'; -SET TIMEZONE TO 'Asia/Yakutsk'; -SET TIMEZONE TO 'Asia/Irkutsk'; -SET TIMEZONE TO 'Asia/Macao'; -SET TIMEZONE TO 'Asia/Taipei'; -SET TIMEZONE TO 'Asia/Kamchatka'; -SET TIMEZONE TO 'Asia/Yerevan'; -SET TIMEZONE TO 'Asia/Harbin'; -SET TIMEZONE TO 'Asia/Manila'; -SET TIMEZONE TO 'Asia/Qostanay'; -SET TIMEZONE TO 'Asia/Amman'; -SET TIMEZONE TO 'Asia/Nicosia'; -SET TIMEZONE TO 'Asia/Karachi'; -SET TIMEZONE TO 'Asia/Rangoon'; -SET TIMEZONE TO 'Asia/Chungking'; -SET TIMEZONE TO 'Asia/Chongqing'; -SET TIMEZONE TO 'Asia/Makassar'; -SET TIMEZONE TO 'Asia/Dacca'; -SET TIMEZONE TO 'Asia/Kuala_Lumpur'; -SET TIMEZONE TO 'Asia/Calcutta'; -SET TIMEZONE TO 'EST5EDT'; -SET TIMEZONE TO 'GMT+0'; -SET TIMEZONE TO 'Pacific/Wake'; -SET TIMEZONE TO 'Pacific/Samoa'; -SET TIMEZONE TO 'Pacific/Efate'; -SET TIMEZONE TO 'Pacific/Niue'; -SET TIMEZONE TO 'Pacific/Pago_Pago'; -SET TIMEZONE TO 'Pacific/Pitcairn'; -SET TIMEZONE TO 'Pacific/Saipan'; -SET TIMEZONE TO 'Pacific/Norfolk'; -SET TIMEZONE TO 'Pacific/Yap'; -SET TIMEZONE TO 'Pacific/Enderbury'; -SET TIMEZONE TO 'Pacific/Port_Moresby'; -SET TIMEZONE TO 'Pacific/Funafuti'; -SET TIMEZONE TO 'Pacific/Apia'; -SET TIMEZONE TO 'Pacific/Rarotonga'; -SET TIMEZONE TO 'Pacific/Ponape'; -SET TIMEZONE TO 'Pacific/Wallis'; -SET TIMEZONE TO 'Pacific/Johnston'; -SET TIMEZONE TO 'Pacific/Guam'; -SET TIMEZONE TO 'Pacific/Guadalcanal'; -SET TIMEZONE TO 'Pacific/Chatham'; -SET TIMEZONE TO 'Pacific/Truk'; -SET TIMEZONE TO 'Pacific/Fakaofo'; -SET TIMEZONE TO 'Pacific/Kosrae'; -SET TIMEZONE TO 'Pacific/Kiritimati'; -SET TIMEZONE TO 'Pacific/Gambier'; -SET TIMEZONE TO 'Pacific/Kwajalein'; -SET TIMEZONE TO 'Pacific/Midway'; -SET TIMEZONE TO 'Pacific/Pohnpei'; -SET TIMEZONE TO 'Pacific/Majuro'; -SET TIMEZONE TO 'Pacific/Tahiti'; -SET TIMEZONE TO 'Pacific/Fiji'; -SET TIMEZONE TO 'Pacific/Tongatapu'; -SET TIMEZONE TO 'Pacific/Palau'; -SET TIMEZONE TO 'Pacific/Galapagos'; -SET TIMEZONE TO 'Pacific/Marquesas'; -SET TIMEZONE TO 'Pacific/Bougainville'; -SET TIMEZONE TO 'Pacific/Honolulu'; -SET TIMEZONE TO 'Pacific/Noumea'; -SET TIMEZONE TO 'Pacific/Auckland'; -SET TIMEZONE TO 'Pacific/Chuuk'; -SET TIMEZONE TO 'Pacific/Nauru'; -SET TIMEZONE TO 'Pacific/Easter'; -SET TIMEZONE TO 'Pacific/Tarawa'; -SET TIMEZONE TO 'America/Detroit'; -SET TIMEZONE TO 'America/Barbados'; -SET TIMEZONE TO 'America/North_Dakota/New_Salem'; -SET TIMEZONE TO 'America/North_Dakota/Center'; -SET TIMEZONE TO 'America/North_Dakota/Beulah'; -SET TIMEZONE TO 'America/Thunder_Bay'; -SET TIMEZONE TO 'America/Panama'; -SET TIMEZONE TO 'America/Cancun'; -SET TIMEZONE TO 'America/Santo_Domingo'; -SET TIMEZONE TO 'America/Matamoros'; -SET TIMEZONE TO 'America/Port-au-Prince'; -SET TIMEZONE TO 'America/Atikokan'; -SET TIMEZONE TO 'America/Knox_IN'; -SET TIMEZONE TO 'America/Cayenne'; -SET TIMEZONE TO 'America/Kralendijk'; -SET TIMEZONE TO 'America/Iqaluit'; -SET TIMEZONE TO 'America/Paramaribo'; -SET TIMEZONE TO 'America/Aruba'; -SET TIMEZONE TO 'America/Vancouver'; -SET TIMEZONE TO 'America/Noronha'; -SET TIMEZONE TO 'America/Ojinaga'; -SET TIMEZONE TO 'America/Atka'; -SET TIMEZONE TO 'America/St_Johns'; -SET TIMEZONE TO 'America/Mexico_City'; -SET TIMEZONE TO 'America/Rosario'; -SET TIMEZONE TO 'America/Nipigon'; -SET TIMEZONE TO 'America/Costa_Rica'; -SET TIMEZONE TO 'America/Regina'; -SET TIMEZONE TO 'America/La_Paz'; -SET TIMEZONE TO 'America/Jamaica'; -SET TIMEZONE TO 'America/Anchorage'; -SET TIMEZONE TO 'America/St_Kitts'; -SET TIMEZONE TO 'America/Godthab'; -SET TIMEZONE TO 'America/Swift_Current'; -SET TIMEZONE TO 'America/Danmarkshavn'; -SET TIMEZONE TO 'America/Phoenix'; -SET TIMEZONE TO 'America/Lower_Princes'; -SET TIMEZONE TO 'America/Yakutat'; -SET TIMEZONE TO 'America/Menominee'; -SET TIMEZONE TO 'America/Bahia'; -SET TIMEZONE TO 'America/Montserrat'; -SET TIMEZONE TO 'America/Miquelon'; -SET TIMEZONE TO 'America/Anguilla'; -SET TIMEZONE TO 'America/Grand_Turk'; -SET TIMEZONE TO 'America/Coral_Harbour'; -SET TIMEZONE TO 'America/Glace_Bay'; -SET TIMEZONE TO 'America/Boa_Vista'; -SET TIMEZONE TO 'America/Dominica'; -SET TIMEZONE TO 'America/Goose_Bay'; -SET TIMEZONE TO 'America/Caracas'; -SET TIMEZONE TO 'America/Boise'; -SET TIMEZONE TO 'America/Cayman'; -SET TIMEZONE TO 'America/Puerto_Rico'; -SET TIMEZONE TO 'America/Guyana'; -SET TIMEZONE TO 'America/Metlakatla'; -SET TIMEZONE TO 'America/Cuiaba'; -SET TIMEZONE TO 'America/Virgin'; -SET TIMEZONE TO 'America/Campo_Grande'; -SET TIMEZONE TO 'America/Maceio'; -SET TIMEZONE TO 'America/Scoresbysund'; -SET TIMEZONE TO 'America/Guadeloupe'; -SET TIMEZONE TO 'America/Indiana/Vevay'; -SET TIMEZONE TO 'America/Indiana/Winamac'; -SET TIMEZONE TO 'America/Indiana/Petersburg'; -SET TIMEZONE TO 'America/Indiana/Marengo'; -SET TIMEZONE TO 'America/Indiana/Knox'; -SET TIMEZONE TO 'America/Indiana/Indianapolis'; -SET TIMEZONE TO 'America/Indiana/Vincennes'; -SET TIMEZONE TO 'America/Indiana/Tell_City'; -SET TIMEZONE TO 'America/Manaus'; -SET TIMEZONE TO 'America/Resolute'; -SET TIMEZONE TO 'America/Pangnirtung'; -SET TIMEZONE TO 'America/Winnipeg'; diff --git a/src/test/isolation2/sql/resgroup/resgroup_cancel_terminate_concurrency.sql b/src/test/isolation2/sql/resgroup/resgroup_cancel_terminate_concurrency.sql index 90eeb44cd70..18ade26a758 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_cancel_terminate_concurrency.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_cancel_terminate_concurrency.sql @@ -9,7 +9,7 @@ CREATE OR REPLACE VIEW rg_concurrency_view AS FROM pg_stat_activity WHERE rsgname='rg_concurrency_test'; -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 1:SET ROLE role_concurrency_test; 1:BEGIN; @@ -35,7 +35,7 @@ DROP ROLE IF EXISTS role_concurrency_test; DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 1:SET ROLE role_concurrency_test; 1:BEGIN; @@ -61,7 +61,7 @@ DROP ROLE IF EXISTS role_concurrency_test; DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 1:SET ROLE role_concurrency_test; 1&:SELECT pg_sleep(10000); @@ -91,7 +91,7 @@ DROP ROLE IF EXISTS role_concurrency_test; DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 1:SET ROLE role_concurrency_test; 1&:SELECT pg_sleep(10000); @@ -121,7 +121,7 @@ DROP ROLE IF EXISTS role_concurrency_test; DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 1:SET ROLE role_concurrency_test; 1:CREATE TEMP TABLE tmp(a INT); diff --git a/src/test/isolation2/sql/resgroup/resgroup_concurrency.sql b/src/test/isolation2/sql/resgroup/resgroup_concurrency.sql index 1459bcafbef..42807f9a1ab 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_concurrency.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_concurrency.sql @@ -4,12 +4,15 @@ DROP ROLE IF EXISTS role_concurrency_test; -- start_ignore DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; -- no query has been assigned to the this group -SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; +SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed +FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r +WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; + 2:SET ROLE role_concurrency_test; 2:BEGIN; 3:SET ROLE role_concurrency_test; @@ -18,8 +21,12 @@ SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_to 4&:BEGIN; -- new transaction will be blocked when the concurrency limit of the resource group is reached. -SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; -SELECT wait_event from pg_stat_activity where query = 'BEGIN;' and state = 'active' and rsgname = 'rg_concurrency_test' and wait_event_type='ResourceGroup'; +SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed +FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r +WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; + +SELECT wait_event from pg_stat_activity +where query = 'BEGIN;' and state = 'active' and rsgname = 'rg_concurrency_test' and wait_event_type='ResourceGroup'; 2:END; 3:END; 4<: @@ -27,18 +34,20 @@ SELECT wait_event from pg_stat_activity where query = 'BEGIN;' and state = 'acti 2q: 3q: 4q: -SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; +SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed +FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; DROP ROLE role_concurrency_test; DROP RESOURCE GROUP rg_concurrency_test; -- test2: test alter concurrency -- Create a resource group with concurrency=2. Prepare 2 running transactions and 1 queueing transactions. --- Alter concurrency 2->3, the queueing transaction will be woken up, the 'value' of pg_resgroupcapability will be set to 3. +-- Alter concurrency 2->3, the queueing transaction will be woken up, the 'value' of pg_resgroupcapability +-- will be set to 3. DROP ROLE IF EXISTS role_concurrency_test; -- start_ignore DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 12:SET ROLE role_concurrency_test; 12:BEGIN; @@ -46,10 +55,13 @@ CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 13:BEGIN; 14:SET ROLE role_concurrency_test; 14&:BEGIN; -SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; +SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed +FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; + SELECT concurrency FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_concurrency_test'; ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 3; -SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; +SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed +FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; SELECT concurrency FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_concurrency_test'; 12:END; 13:END; @@ -67,7 +79,7 @@ DROP ROLE IF EXISTS role_concurrency_test; -- start_ignore DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=3, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=3, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 22:SET ROLE role_concurrency_test; 22:BEGIN; @@ -77,26 +89,31 @@ CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 24:BEGIN; 25:SET ROLE role_concurrency_test; 25&:BEGIN; -SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; +SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed +FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; SELECT concurrency FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_concurrency_test'; -- Alter concurrency 3->2, the 'value' of pg_resgroupcapability will be set to 2. ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2; SELECT concurrency FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_concurrency_test'; -- When one transaction is finished, queueing transaction won't be woken up. There're 2 running transactions and 1 queueing transaction. 24:END; -SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; +SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed +FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; -- New transaction will be queued, there're 2 running and 2 queueing transactions. 24&:BEGIN; -SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; +SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed +FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; -- Finish another transaction, one queueing transaction will be woken up, there're 2 running transactions and 1 queueing transaction. 22:END; -SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; +SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed +FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; -- Alter concurrency 2->2, the 'value' of pg_resgroupcapability will be set to 2. ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2; SELECT concurrency FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_concurrency_test'; -- Finish another transaction, one queueing transaction will be woken up, there're 2 running transactions and 0 queueing transaction. 23:END; -SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; +SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed +FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; 24<: 25<: 25:END; @@ -114,7 +131,7 @@ DROP ROLE IF EXISTS role_concurrency_test; -- start_ignore DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=2, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; -- DROP should fail if there're running transactions @@ -133,7 +150,7 @@ DROP ROLE IF EXISTS role_concurrency_test; DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 51:SET ROLE role_concurrency_test; 51:BEGIN; @@ -155,7 +172,7 @@ DROP ROLE IF EXISTS role_concurrency_test; DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 51:SET ROLE role_concurrency_test; 51:BEGIN; @@ -174,7 +191,7 @@ DROP ROLE IF EXISTS role_concurrency_test; DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 61:SET ROLE role_concurrency_test; 61:BEGIN; @@ -193,7 +210,7 @@ DROP ROLE IF EXISTS role_concurrency_test; DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; 61:SET ROLE role_concurrency_test; 61&:BEGIN; @@ -207,7 +224,7 @@ DROP RESOURCE GROUP rg_concurrency_test; -- Test cursors, pl/* functions only take one slot. -- -- set concurrency to 1 -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20); CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; -- declare cursors and verify that it only takes one resource group slot diff --git a/src/test/isolation2/sql/resgroup/resgroup_cpuset_empty_default.sql b/src/test/isolation2/sql/resgroup/resgroup_cpuset_empty_default.sql index fb2811f3865..2731486c424 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_cpuset_empty_default.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_cpuset_empty_default.sql @@ -18,7 +18,7 @@ DROP RESOURCE GROUP rg1_cpuset_test; -- Create a resource group with all the cpu cores. -- The isolation2 test framework does not support \set so we have to plan with -- some tricks. -! psql -d isolation2resgrouptest -Ac "CREATE RESOURCE GROUP rg1_cpuset_test WITH (memory_limit=10, cpuset='0-$(($(nproc)-1))')"; +! psql -d isolation2resgrouptest -Ac "CREATE RESOURCE GROUP rg1_cpuset_test WITH (cpuset='0-$(($(nproc)-1))')"; -- Alter a resource group from / to all the cpu cores should also work. ALTER RESOURCE GROUP rg1_cpuset_test SET cpuset '0'; diff --git a/src/test/isolation2/sql/resgroup/resgroup_dumpinfo.sql b/src/test/isolation2/sql/resgroup/resgroup_dumpinfo.sql index c6b4c490e9a..ae6282d7a89 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_dumpinfo.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_dumpinfo.sql @@ -17,8 +17,7 @@ def validate(json_obj, segnum): return False qd_info = [j for j in array if j["segid"] == -1][0] #validate keys - keys = ["segid", "segmentsOnMaster", "loaded", "totalChunks", - "freeChunks", "chunkSizeInBits", "groups"] + keys = ["segid", "segmentsOnMaster", "loaded", "groups"] for key in keys: if key not in qd_info: return False @@ -29,9 +28,7 @@ def validate(json_obj, segnum): return False group = groups[0] #validate group keys - keys = ["group_id", "nRunning", "locked_for_drop", "memExpected", - "memQuotaGranted", "memSharedGranted", "memQuotaUsed", - "memUsage", "memSharedUsage"] + keys = ["group_id", "nRunning", "locked_for_drop"] for key in keys: if key not in group: return False @@ -60,7 +57,7 @@ return validate(json_obj, n) $$ LANGUAGE plpython3u; -CREATE RESOURCE GROUP rg_dumpinfo_test WITH (concurrency=2, cpu_rate_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_dumpinfo_test WITH (concurrency=2, cpu_hard_quota_limit=20); CREATE ROLE role_dumpinfo_test RESOURCE GROUP rg_dumpinfo_test; 2:SET ROLE role_dumpinfo_test; diff --git a/src/test/isolation2/sql/resgroup/resgroup_functions.sql b/src/test/isolation2/sql/resgroup/resgroup_functions.sql index e9d08e6c54f..385012353f2 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_functions.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_functions.sql @@ -1,11 +1,11 @@ -- start_ignore SELECT s.groupid, s.num_running, s.num_queueing, s.num_queued, s.num_executed -FROM pg_resgroup_get_status(NULL::oid) s(groupid, num_running, num_queueing, num_queued, num_executed, total_queue_duration, cpu_usage, memory_usage); +FROM pg_resgroup_get_status(NULL::oid) s(groupid, num_running, num_queueing, num_queued, num_executed, total_queue_duration, cpu_usage); -- end_ignore CREATE TEMP TABLE resgroup_function_test(LIKE gp_toolkit.gp_resgroup_status); INSERT INTO resgroup_function_test(groupid, num_running, num_queueing, num_queued, num_executed) SELECT s.groupid, s.num_running, s.num_queueing, s.num_queued, s.num_executed -FROM pg_resgroup_get_status(NULL::oid) s(groupid, num_running, num_queueing, num_queued, num_executed, total_queue_duration, cpu_usage, memory_usage) LIMIT 1; +FROM pg_resgroup_get_status(NULL::oid) s(groupid, num_running, num_queueing, num_queued, num_executed, total_queue_duration, cpu_usage) LIMIT 1; SELECT count(num_executed)>0 FROM resgroup_function_test WHERE num_executed IS NOT NULL; diff --git a/src/test/isolation2/sql/resgroup/resgroup_large_group_id.sql b/src/test/isolation2/sql/resgroup/resgroup_large_group_id.sql index 10ec72fa8b6..461e8eaff50 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_large_group_id.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_large_group_id.sql @@ -1,7 +1,7 @@ -- Test resgroup oid larger than int32. select gp_inject_fault('bump_oid', 'skip', dbid) from gp_segment_configuration where role = 'p' and content = -1; -create resource group rg_large_oid with (cpu_rate_limit=20, memory_limit=10); +create resource group rg_large_oid with (cpu_hard_quota_limit=20); select gp_inject_fault('bump_oid', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = -1; diff --git a/src/test/isolation2/sql/resgroup/resgroup_memory_hashagg_spill.sql b/src/test/isolation2/sql/resgroup/resgroup_memory_hashagg_spill.sql deleted file mode 100644 index 77bb99e56b7..00000000000 --- a/src/test/isolation2/sql/resgroup/resgroup_memory_hashagg_spill.sql +++ /dev/null @@ -1,127 +0,0 @@ --- start_matchsubs --- m/INSERT \d+/ --- s/INSERT \d+/INSERT/ --- end_matchsubs -create schema hashagg_spill; -set search_path to hashagg_spill; - --- start_ignore -create language plpython3u; --- end_ignore - --- set workfile is created to true if all segment did it. -create or replace function hashagg_spill.is_workfile_created(explain_query text) -returns setof int as -$$ -import re -query = "select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0;" -rv = plpy.execute(query) -nsegments = int(rv[0]['nsegments']) -rv = plpy.execute(explain_query) -search_text = 'Work_mem used' -result = [] -for i in range(len(rv)): - cur_line = rv[i]['QUERY PLAN'] - if search_text.lower() in cur_line.lower(): - p = re.compile('.+\((seg[\d]+).+ Workfile: \(([\d+]) spilling\)') - m = p.match(cur_line) - if not m: - continue - workfile_created = int(m.group(2)) - cur_row = int(workfile_created == nsegments) - result.append(cur_row) -return result -$$ -language plpython3u; - -create table testhagg (i1 int, i2 int, i3 int, i4 int); -insert into testhagg select i,i,i,i from - (select generate_series(1, nsegments * 17000) as i from - (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; - - --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; --- end_ignore -CREATE ROLE role1_memory_test SUPERUSER; -CREATE RESOURCE GROUP rg1_memory_test WITH -(concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=10); -SET ROLE TO role1_memory_test; - -0: ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SPILL_RATIO 2; -0: ALTER ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -set gp_resgroup_print_operator_memory_limits=on; - --- the number of rows returned by the query varies depending on the number of segments, so --- only print the first 10 -select * from (select max(i1) from testhagg group by i2) foo order by 1 limit 10; -select * from hashagg_spill.is_workfile_created('explain analyze select max(i1) from testhagg group by i2;'); -select * from hashagg_spill.is_workfile_created('explain analyze select max(i1) from testhagg group by i2 limit 45000;'); - - --- Test HashAgg with increasing amount of overflows - -reset all; - --- Returns the number of overflows from EXPLAIN ANALYZE output -create or replace function hashagg_spill.num_hashagg_overflows(explain_query text) -returns setof int as -$$ -import re -query = "select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0;" -rv = plpy.execute(query) -rv = plpy.execute(explain_query) -result = [] -for i in range(len(rv)): - cur_line = rv[i]['QUERY PLAN'] - p = re.compile('.+\((seg[\d]+).+ ([\d+]) overflows;') - m = p.match(cur_line) - if m: - overflows = int(m.group(2)) - result.append(overflows) -return result -$$ -language plpython3u; - --- Test agg spilling scenarios -drop table if exists aggspill; -create table aggspill (i int, j int, t text) distributed by (i); -insert into aggspill select i, i*2, i::text from generate_series(1, 10000) i; -insert into aggspill select i, i*2, i::text from generate_series(1, 100000) i; -insert into aggspill select i, i*2, i::text from generate_series(1, 1000000) i; - --- No spill with large statement memory -0: ALTER ROLE role1_memory_test RESOURCE GROUP none; -0: DROP RESOURCE GROUP rg1_memory_test; -0: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=1, cpu_rate_limit=10, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=30); -0: ALTER ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -select count(*) from (select i, count(*) from aggspill group by i,j having count(*) = 1) g; - --- Reduce the statement memory to induce spilling -0: ALTER ROLE role1_memory_test RESOURCE GROUP none; -0: DROP RESOURCE GROUP rg1_memory_test; -0: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=10); -0: ALTER ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -select overflows >= 1 from hashagg_spill.num_hashagg_overflows('explain analyze -select count(*) from (select i, count(*) from aggspill group by i,j having count(*) = 2) g') overflows; -select count(*) from (select i, count(*) from aggspill group by i,j having count(*) = 2) g; - --- Reduce the statement memory, nbatches and entrysize even further to cause multiple overflows -set gp_hashagg_default_nbatches = 4; -0: ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SPILL_RATIO 5; -0: ALTER ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; - -select overflows > 1 from hashagg_spill.num_hashagg_overflows('explain analyze -select count(*) from (select i, count(*) from aggspill group by i,j,t having count(*) = 3) g') overflows; - -select count(*) from (select i, count(*) from aggspill group by i,j,t having count(*) = 3) g; - -drop schema hashagg_spill cascade; -drop table aggspill; - --- start_ignore -RESET ROLE; -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; --- end_ignore diff --git a/src/test/isolation2/sql/resgroup/resgroup_memory_hashjoin_spill.sql b/src/test/isolation2/sql/resgroup/resgroup_memory_hashjoin_spill.sql deleted file mode 100644 index edfb33ce080..00000000000 --- a/src/test/isolation2/sql/resgroup/resgroup_memory_hashjoin_spill.sql +++ /dev/null @@ -1,72 +0,0 @@ --- start_matchsubs --- m/INSERT \d+/ --- s/INSERT \d+/INSERT/ --- end_matchsubs -create schema hashjoin_spill; -set search_path to hashjoin_spill; - --- start_ignore -create language plpython3u; --- end_ignore - --- set workfile is created to true if all segment did it. -create or replace function hashjoin_spill.is_workfile_created(explain_query text) -returns setof int as -$$ -import re -query = "select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0;" -rv = plpy.execute(query) -nsegments = int(rv[0]['nsegments']) -rv = plpy.execute(explain_query) -search_text = 'Work_mem used' -result = [] -for i in range(len(rv)): - cur_line = rv[i]['QUERY PLAN'] - if search_text.lower() in cur_line.lower(): - p = re.compile('.+\((seg[\d]+).+ Workfile: \(([\d+]) spilling\)') - m = p.match(cur_line) - workfile_created = int(m.group(2)) - cur_row = int(workfile_created == nsegments) - result.append(cur_row) -return result -$$ -language plpython3u; - --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; --- end_ignore -CREATE RESOURCE GROUP rg1_memory_test WITH -(concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=1); -CREATE ROLE role1_memory_test SUPERUSER RESOURCE GROUP rg1_memory_test; -SET ROLE TO role1_memory_test; - -CREATE TABLE test_hj_spill (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8 int); -insert into test_hj_spill SELECT i,i,i%1000,i,i,i,i,i from - (select generate_series(1, nsegments * 15000) as i from - (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; -set gp_resgroup_print_operator_memory_limits=on; - -set gp_workfile_type_hashjoin=buffile; -select avg(i3) from (SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2) foo; -select * from hashjoin_spill.is_workfile_created('explain analyze SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2;'); -select * from hashjoin_spill.is_workfile_created('explain analyze SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2 LIMIT 15000;'); - -set gp_workfile_type_hashjoin=bfz; -set gp_workfile_compress_algorithm=zlib; -select avg(i3) from (SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2) foo; -select * from hashjoin_spill.is_workfile_created('explain analyze SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2'); -select * from hashjoin_spill.is_workfile_created('explain analyze SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2 LIMIT 15000;'); - -set gp_workfile_compress_algorithm=NONE; -select avg(i3) from (SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2) foo; -select * from hashjoin_spill.is_workfile_created('explain analyze SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2'); -select * from hashjoin_spill.is_workfile_created('explain analyze SELECT t1.* FROM test_hj_spill AS t1 RIGHT JOIN test_hj_spill AS t2 ON t1.i1=t2.i2 LIMIT 15000;'); - -drop schema hashjoin_spill cascade; - --- start_ignore -RESET ROLE; -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; --- end_ignore diff --git a/src/test/isolation2/sql/resgroup/resgroup_memory_materialize_spill.sql b/src/test/isolation2/sql/resgroup/resgroup_memory_materialize_spill.sql deleted file mode 100644 index 90c1a8b28aa..00000000000 --- a/src/test/isolation2/sql/resgroup/resgroup_memory_materialize_spill.sql +++ /dev/null @@ -1,104 +0,0 @@ --- start_matchsubs --- m/INSERT \d+/ --- s/INSERT \d+/INSERT/ --- end_matchsubs -create schema materialize_spill; -set search_path to materialize_spill; - --- start_ignore -create language plpython3u; --- end_ignore - --- Helper function to verify that a plan spilled to disk. For each node --- in the plan that used Workfiles (Materialize or Sort nodes, currently), --- return the number of segments where the node spilled to disk. -create or replace function num_workfiles_created(explain_query text) -returns setof int as -$$ -import re -rv = plpy.execute(explain_query) -search_text = 'Work_mem used' -result = [] -for i in range(len(rv)): - cur_line = rv[i]['QUERY PLAN'] - if search_text.lower() in cur_line.lower(): - p = re.compile('.+\((seg[\d]+).+ Workfile: \(([\d+]) spilling\)') - m = p.match(cur_line) - workfile_created = int(m.group(2)) - result.append(workfile_created) -return result -$$ -language plpython3u; - --- Run a query that contains a Materialize node that spills to disk. --- --- The expected plan is something like this: --- --- Gather Motion 3:1 --- -> Nested Loop Left Join --- Join Filter: t1.i1 = t2.i2 --- -> Seq Scan on test_mat_small t1 --- -> Materialize --- -> Redistribute Motion 3:3 --- Hash Key: t2.i2 --- -> Seq Scan on test_mat_large t2 --- --- The planner will put a Materialize node on the inner side, to shield --- the Motion node from rewinding. Because the larger table doesn't fit --- in memory, the Materialize will spill to disk. --- -CREATE TABLE test_mat_small (i1 int); -INSERT INTO test_mat_small SELECT i from generate_series(101, 105) i; - --- Scale the larger table's size with the number of segments, so that there is enough --- data on every segment to cause spilling. -CREATE TABLE test_mat_large (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8 int); -INSERT INTO test_mat_large SELECT i,i,i,i,i,i,i,i from - (select generate_series(1, nsegments * 50000) as i from - (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; - --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; --- end_ignore -CREATE RESOURCE GROUP rg1_memory_test WITH -(concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=1); -CREATE ROLE role1_memory_test SUPERUSER RESOURCE GROUP rg1_memory_test; -SET ROLE TO role1_memory_test; - -set gp_resgroup_print_operator_memory_limits=on; -set enable_hashjoin = false; -set enable_nestloop = true; --- ORCA doesn't honor enable_nestloop/enable_hashjoin, so this won't produce --- the kind of plan we're looking for. -set optimizer=off; - --- This is the actual test query. -select * FROM test_mat_small as t1 left outer join test_mat_large AS t2 on t1.i1=t2.i2; - --- Check that the Materialize node spilled to disk, to make sure we're testing spilling --- as intended. The inner side of the join with the Materialize will not get executed on --- segments that have no data for the outer side. Therefore, we expect the Materialize --- node to only be executed, and spilled, on as many segments as there nodes that hold --- data from test_mat_small. -select n - (select count (distinct gp_segment_id) from test_mat_small) as difference -from num_workfiles_created($$ - explain analyze - select * FROM test_mat_small as t1 left outer join test_mat_large AS t2 on t1.i1=t2.i2 -$$) as n; - --- Repeat, with a LIMIT. This causes the underlying scan to finish earlier. -select * FROM test_mat_small as t1 left outer join test_mat_large AS t2 on t1.i1=t2.i2 limit 10; -select n - (select count (distinct gp_segment_id) from test_mat_small) as difference -from num_workfiles_created($$ - explain analyze - select * FROM test_mat_small as t1 left outer join test_mat_large AS t2 on t1.i1=t2.i2 limit 10 -$$) as n; - -drop schema materialize_spill cascade; - --- start_ignore -RESET ROLE; -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; --- end_ignore diff --git a/src/test/isolation2/sql/resgroup/resgroup_memory_sisc_mat_sort.sql b/src/test/isolation2/sql/resgroup/resgroup_memory_sisc_mat_sort.sql deleted file mode 100644 index 35ff681a3d3..00000000000 --- a/src/test/isolation2/sql/resgroup/resgroup_memory_sisc_mat_sort.sql +++ /dev/null @@ -1,101 +0,0 @@ --- start_matchsubs --- m/INSERT \d+/ --- s/INSERT \d+/INSERT/ --- end_matchsubs -create schema sisc_mat_sort; -set search_path to sisc_mat_sort; - --- start_ignore -create language plpython3u; --- end_ignore - --- set workfile is created to true if all segment did it. -create or replace function sisc_mat_sort.is_workfile_created(explain_query text) -returns setof int as -$$ -import re -query = "select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0;" -rv = plpy.execute(query) -nsegments = int(rv[0]['nsegments']) -rv = plpy.execute(explain_query) -search_text = 'Work_mem used' -result = [] -for i in range(len(rv)): - cur_line = rv[i]['QUERY PLAN'] - if search_text.lower() in cur_line.lower(): - p = re.compile('.+\((seg[\d]+).+ Workfile: \(([\d+]) spilling\)') - m = p.match(cur_line) - workfile_created = int(m.group(2)) - cur_row = int(workfile_created == nsegments) - result.append(cur_row) -return result -$$ -language plpython3u; - -create table testsiscm (i1 int, i2 int, i3 int, i4 int); -insert into testsiscm select i, i % 1000, i % 100000, i % 75 from - (select generate_series(1, nsegments * 150000) as i from - (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; - --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; --- end_ignore -CREATE RESOURCE GROUP rg1_memory_test WITH -(concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=3); -CREATE ROLE role1_memory_test SUPERUSER RESOURCE GROUP rg1_memory_test; -SET ROLE TO role1_memory_test; - - -set gp_resgroup_print_operator_memory_limits=on; -set gp_cte_sharing=on; -set gp_enable_mk_sort=on; --- The expected output is very sensitive to the kind of plan this produces. --- We're testing the executor, not the planner, so force ORCA off, to get --- the particular plan -set optimizer=off; - -select count(*) from (with ctesisc as - (select count(i1) as c1, i2 as c2, i3 as c3 from testsiscm group by i2, i3) -select * -from ctesisc as t1, ctesisc as t2 -where t1.c1 = t2.c1 and t1.c3 = t2.c3) foo; -select * from sisc_mat_sort.is_workfile_created('explain analyze -with ctesisc as - (select count(i1) as c1, i2 as c2, i3 as c3 from testsiscm group by i2, i3) -select * -from ctesisc as t1, ctesisc as t2 -where t1.c1 = t2.c1 and t1.c3 = t2.c3;'); -select * from sisc_mat_sort.is_workfile_created('explain analyze -with ctesisc as - (select count(i1) as c1, i2 as c2, i3 as c3 from testsiscm group by i2, i3) -select * -from ctesisc as t1, ctesisc as t2 -where t1.c1 = t2.c1 and t1.c3 = t2.c3 limit 50000;'); - -set gp_enable_mk_sort=off; -select count(*) from (with ctesisc as - (select count(i1) as c1, i2 as c2, i3 as c3 from testsiscm group by i2, i3) -select * -from ctesisc as t1, ctesisc as t2 -where t1.c1 = t2.c1 and t1.c3 = t2.c3) foo; -select * from sisc_mat_sort.is_workfile_created('explain analyze -with ctesisc as - (select count(i1) as c1, i2 as c2, i3 as c3 from testsiscm group by i2, i3) -select * -from ctesisc as t1, ctesisc as t2 -where t1.c1 = t2.c1 and t1.c3 = t2.c3;'); -select * from sisc_mat_sort.is_workfile_created('explain analyze -with ctesisc as - (select count(i1) as c1, i2 as c2, i3 as c3 from testsiscm group by i2, i3) -select * -from ctesisc as t1, ctesisc as t2 -where t1.c1 = t2.c1 and t1.c3 = t2.c3 limit 50000;'); - -drop schema sisc_mat_sort cascade; - --- start_ignore -RESET ROLE; -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; --- end_ignore diff --git a/src/test/isolation2/sql/resgroup/resgroup_memory_sisc_sort_spill.sql b/src/test/isolation2/sql/resgroup/resgroup_memory_sisc_sort_spill.sql deleted file mode 100644 index 673a11c4ca9..00000000000 --- a/src/test/isolation2/sql/resgroup/resgroup_memory_sisc_sort_spill.sql +++ /dev/null @@ -1,105 +0,0 @@ --- start_matchsubs --- m/INSERT \d+/ --- s/INSERT \d+/INSERT/ --- end_matchsubs -create schema sisc_sort_spill; -set search_path to sisc_sort_spill; - --- start_ignore -create language plpython3u; --- end_ignore - --- set workfile is created to true if all segment did it. -create or replace function sisc_sort_spill.is_workfile_created(explain_query text) -returns setof int as -$$ -import re -query = "select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0;" -rv = plpy.execute(query) -nsegments = int(rv[0]['nsegments']) -rv = plpy.execute(explain_query) -search_text = 'Work_mem used' -result = [] -for i in range(len(rv)): - cur_line = rv[i]['QUERY PLAN'] - if search_text.lower() in cur_line.lower(): - p = re.compile('.+\((seg[\d]+).+ Workfile: \(([\d+]) spilling\)') - m = p.match(cur_line) - workfile_created = int(m.group(2)) - cur_row = int(workfile_created == nsegments) - result.append(cur_row) -return result -$$ -language plpython3u; - -create table testsisc (i1 int, i2 int, i3 int, i4 int); -insert into testsisc select i, i % 1000, i % 100000, i % 75 from - (select generate_series(1, nsegments * 50000) as i from - (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; - --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; --- end_ignore -CREATE RESOURCE GROUP rg1_memory_test WITH -(concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=2); -CREATE ROLE role1_memory_test SUPERUSER RESOURCE GROUP rg1_memory_test; -SET ROLE TO role1_memory_test; - -set gp_resgroup_print_operator_memory_limits=on; -set gp_cte_sharing=on; --- ORCA optimizes away the ORDER BY in our test query, and therefore doesn't exercise --- a Sort that spills. -set optimizer=off; - -set gp_enable_mk_sort=on; -select avg(i3) from ( - with ctesisc as (select * from testsisc order by i2) - select t1.i3, t2.i2 - from ctesisc as t1, ctesisc as t2 - where t1.i1 = t2.i2 -) foo; - -select * from sisc_sort_spill.is_workfile_created('explain analyze - with ctesisc as (select * from testsisc order by i2) - select t1.i3, t2.i2 - from ctesisc as t1, ctesisc as t2 - where t1.i1 = t2.i2 -;'); -select * from sisc_sort_spill.is_workfile_created('explain analyze - with ctesisc as (select * from testsisc order by i2) - select t1.i3, t2.i2 - from ctesisc as t1, ctesisc as t2 - where t1.i1 = t2.i2 -limit 50000;'); - - -set gp_enable_mk_sort=off; -select avg(i3) from ( - with ctesisc as (select * from testsisc order by i2) - select t1.i3, t2.i2 - from ctesisc as t1, ctesisc as t2 - where t1.i1 = t2.i2 -) foo; - -select * from sisc_sort_spill.is_workfile_created('explain analyze - with ctesisc as (select * from testsisc order by i2) - select t1.i3, t2.i2 - from ctesisc as t1, ctesisc as t2 - where t1.i1 = t2.i2 -;'); - -select * from sisc_sort_spill.is_workfile_created('explain analyze - with ctesisc as (select * from testsisc order by i2) - select t1.i3, t2.i2 - from ctesisc as t1, ctesisc as t2 - where t1.i1 = t2.i2 -limit 50000;'); - -drop schema sisc_sort_spill cascade; - --- start_ignore -RESET ROLE; -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; --- end_ignore diff --git a/src/test/isolation2/sql/resgroup/resgroup_memory_sort_spill.sql b/src/test/isolation2/sql/resgroup/resgroup_memory_sort_spill.sql deleted file mode 100644 index bf36a9d7a7b..00000000000 --- a/src/test/isolation2/sql/resgroup/resgroup_memory_sort_spill.sql +++ /dev/null @@ -1,68 +0,0 @@ --- start_matchsubs --- m/INSERT \d+/ --- s/INSERT \d+/INSERT/ --- end_matchsubs -create schema sort_spill; -set search_path to sort_spill; - --- start_ignore -create language plpython3u; --- end_ignore - --- set workfile is created to true if all segment did it. -create or replace function sort_spill.is_workfile_created(explain_query text) -returns setof int as -$$ -import re -query = "select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0;" -rv = plpy.execute(query) -nsegments = int(rv[0]['nsegments']) -rv = plpy.execute(explain_query) -search_text = 'Work_mem used' -result = [] -for i in range(len(rv)): - cur_line = rv[i]['QUERY PLAN'] - if search_text.lower() in cur_line.lower(): - p = re.compile('.+\((seg[\d]+).+ Workfile: \(([\d+]) spilling\)') - m = p.match(cur_line) - workfile_created = int(m.group(2)) - cur_row = int(workfile_created == nsegments) - result.append(cur_row) -return result -$$ -language plpython3u; - - -create table testsort (i1 int, i2 int, i3 int, i4 int); -insert into testsort select i, i % 1000, i % 100000, i % 75 from - (select generate_series(1, nsegments * 50000) as i from - (select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar; - --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; --- end_ignore -CREATE RESOURCE GROUP rg1_memory_test WITH -(concurrency=2, cpu_rate_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=1); -CREATE ROLE role1_memory_test SUPERUSER RESOURCE GROUP rg1_memory_test; -SET ROLE TO role1_memory_test; - -set gp_resgroup_print_operator_memory_limits=on; - -set gp_enable_mk_sort=on; -select avg(i2) from (select i1,i2 from testsort order by i2) foo; -select * from sort_spill.is_workfile_created('explain analyze select i1,i2 from testsort order by i2;'); -select * from sort_spill.is_workfile_created('explain analyze select i1,i2 from testsort order by i2 limit 50000;'); - -set gp_enable_mk_sort=off; -select avg(i2) from (select i1,i2 from testsort order by i2) foo; -select * from sort_spill.is_workfile_created('explain analyze select i1,i2 from testsort order by i2;'); -select * from sort_spill.is_workfile_created('explain analyze select i1,i2 from testsort order by i2 limit 50000;'); - -drop schema sort_spill cascade; - --- start_ignore -RESET ROLE; -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; --- end_ignore diff --git a/src/test/isolation2/sql/resgroup/resgroup_memory_spilltodisk.sql b/src/test/isolation2/sql/resgroup/resgroup_memory_spilltodisk.sql deleted file mode 100644 index eba39b45037..00000000000 --- a/src/test/isolation2/sql/resgroup/resgroup_memory_spilltodisk.sql +++ /dev/null @@ -1,70 +0,0 @@ --- up the admin_group memory limits -ALTER RESOURCE GROUP admin_group SET memory_limit 30; - --- Test Mark/Restore in Material Node -create table spilltest1 (a integer); -create table spilltest2 (a integer); -insert into spilltest1 select a from generate_series(1,400000) a; -insert into spilltest2 select a from generate_series(1,400000) a; - --- go back to the default admin_group limit -ALTER RESOURCE GROUP admin_group SET memory_limit 10; - --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; --- end_ignore -CREATE RESOURCE GROUP rg1_memory_test WITH -(concurrency=2, cpu_rate_limit=10, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=10); -CREATE ROLE role1_memory_test SUPERUSER RESOURCE GROUP rg1_memory_test; -SET ROLE TO role1_memory_test; - -set enable_hashagg=off; -set enable_mergejoin=on; -set enable_hashjoin=off; -set enable_nestloop=off; - -create temporary table spilltestresult1 as -select t1.a as t1a, t2.a as t2a -from (select a from spilltest1 group by a) t1, - (select a from spilltest2 group by a) t2 -where t1.a = t2.a; - --- check that the result looks sane -select count(*), sum(t1a), sum(t2a), sum(t1a - t2a) from spilltestresult1; - --- Test Hash Aggregation when the work mem is too small for the hash table -create table spilltest (a integer, b integer); -insert into spilltest select a, a%25 from generate_series(1,8000) a; -analyze; -set enable_hashagg=on; -set enable_groupagg=off; - -select b,count(*) from spilltest group by b order by b; - -select b,count(*) from spilltest group by b order by b; --- Test Hash Join when the work mem is too small for the hash table -drop table if exists spilltest; -create table spilltest (a integer, b integer); -insert into spilltest select a, a%25 from generate_series(1,800000) a; -analyze; -- We have to do an analyze to force a hash join -set enable_mergejoin=off; -set enable_nestloop=off; -set enable_hashjoin=on; - -create temporary table spilltestresult2 as -select t1.a as t1a, t1.b as t1b, t2.a as t2a, t2.b as t2b from spilltest t1, spilltest t2 where t1.a = t2.a; --- check that the result looks sane -select count(*), sum(t1a), sum(t2a), sum(t2a), sum(t2b), sum(t1a * t1b) from spilltestresult2; - -drop table spilltest1; -drop table spilltest2; -drop table spilltest; -drop table spilltestresult1; -drop table spilltestresult2; - --- start_ignore -RESET ROLE; -DROP ROLE IF EXISTS role1_memory_test; -DROP RESOURCE GROUP rg1_memory_test; --- end_ignore diff --git a/src/test/isolation2/sql/resgroup/resgroup_name_convention.sql b/src/test/isolation2/sql/resgroup/resgroup_name_convention.sql index 4a1a87633b9..7898f946950 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_name_convention.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_name_convention.sql @@ -34,26 +34,26 @@ CREATE OR REPLACE VIEW rg_name_view AS -- -- by default resgroup names have the form of [_a-zA-Z][_a-zA-Z0-9]* -CREATE RESOURCE GROUP rgNameTest01 WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rgNameTest01 WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP rgNameTest01 SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP rgNameTest01; -CREATE RESOURCE GROUP __rg_name_test_01__ WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP __rg_name_test_01__ WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP __rg_name_test_01__ SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP __rg_name_test_01__; -- min length is 1 character -CREATE RESOURCE GROUP Z WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP Z WITH (cpu_hard_quota_limit=10); DROP RESOURCE GROUP Z; -- max length is 63 characters -CREATE RESOURCE GROUP max012345678901234567890123456789012345678901234567890123456789 WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP max012345678901234567890123456789012345678901234567890123456789 WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP max012345678901234567890123456789012345678901234567890123456789 SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP max012345678901234567890123456789012345678901234567890123456789; -- characters exceed the max length are ignored -CREATE RESOURCE GROUP max012345678901234567890123456789012345678901234567890123456789further WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP max012345678901234567890123456789012345678901234567890123456789further WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP max012345678901234567890123456789012345678901234567890123456789are SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP max012345678901234567890123456789012345678901234567890123456789ignored; @@ -62,7 +62,7 @@ DROP RESOURCE GROUP max0123456789012345678901234567890123456789012345678901234 -- white spaces CREATE RESOURCE GROUP "newlines s p a c e s -t a b s" WITH (cpu_rate_limit=10, memory_limit=10); +t a b s" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "newlines s p a c e s t a b s" SET concurrency 2; @@ -71,39 +71,39 @@ DROP RESOURCE GROUP "newlines s p a c e s t a b s"; -- punctuations -CREATE RESOURCE GROUP "!#$%&`()*+,-./:;<=>?@[]^_{|}~" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "!#$%&`()*+,-./:;<=>?@[]^_{|}~" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "!#$%&`()*+,-./:;<=>?@[]^_{|}~" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP "!#$%&`()*+,-./:;<=>?@[]^_{|}~"; -- quotation marks -CREATE RESOURCE GROUP "'' are 2 single quotation marks" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "'' are 2 single quotation marks" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "'' are 2 single quotation marks" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP "'' are 2 single quotation marks"; -CREATE RESOURCE GROUP """ is 1 double quotation mark" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP """ is 1 double quotation mark" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP """ is 1 double quotation mark" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP """ is 1 double quotation mark"; -- nothing special with leading character -CREATE RESOURCE GROUP "0 as prefix" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "0 as prefix" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "0 as prefix" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP "0 as prefix"; -CREATE RESOURCE GROUP " leading space" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP " leading space" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP " leading space" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP " leading space"; -- backslash is not used as the escape character -CREATE RESOURCE GROUP "\\ are two backslashes" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "\\ are two backslashes" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "\\ are two backslashes" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP "\\ are two backslashes"; -- below are octal, hex and unicode representations of "rg1" -CREATE RESOURCE GROUP "\o162\o147\o61" WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP "\x72\x67\x31" WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP "\u0072\u0067\u0031" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "\o162\o147\o61" WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP "\x72\x67\x31" WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP "\u0072\u0067\u0031" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "\o162\o147\o61" SET concurrency 2; ALTER RESOURCE GROUP "\x72\x67\x31" SET concurrency 2; ALTER RESOURCE GROUP "\u0072\u0067\u0031" SET concurrency 2; @@ -117,35 +117,35 @@ DROP RESOURCE GROUP "\x72\x67\x31"; DROP RESOURCE GROUP "\u0072\u0067\u0031"; -- unicode escapes are supported -CREATE RESOURCE GROUP U&"\0441\043B\043E\043D" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP U&"\0441\043B\043E\043D" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP U&"\0441\043B\043E\043D" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP U&"\0441\043B\043E\043D"; -- unicode representation of "rg1" -CREATE RESOURCE GROUP U&"\0072\0067\0031" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP U&"\0072\0067\0031" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "rg1" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP "rg1"; -- CJK characters are allowed with or without double quotation marks -CREATE RESOURCE GROUP 资源组 WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP 资源组 WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "资源组" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP 资源组; -CREATE RESOURCE GROUP リソース・グループ WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP リソース・グループ WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "リソース・グループ" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP リソース・グループ; -CREATE RESOURCE GROUP 자원그룹 WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP 자원그룹 WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "자원그룹" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP 자원그룹; -- names are case sensitive, -- but are always converted to lower case unless around with quotation marks -CREATE RESOURCE GROUP "RG_NAME_TEST" WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP rg_Name_Test WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP "rg_name_test" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "RG_NAME_TEST" WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP rg_Name_Test WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP "rg_name_test" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP Rg_NaMe_TeSt SET concurrency 2; ALTER RESOURCE GROUP "RG_NAME_TEST" SET concurrency 2; SELECT * FROM rg_name_view; @@ -154,36 +154,36 @@ DROP RESOURCE GROUP RG_nAME_tEST; -- reserved names are all lower case: "default_group", "admin_group", "none", -- they can be used by users with at least one upper case character. -CREATE RESOURCE GROUP "None" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "None" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "None" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP "None"; -CREATE RESOURCE GROUP "NONE" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "NONE" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "NONE" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP "NONE"; -CREATE RESOURCE GROUP "DEFAULT_GROup" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "DEFAULT_GROup" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "DEFAULT_GROup" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP "DEFAULT_GROup"; -CREATE RESOURCE GROUP "ADMIN_GROUP" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "ADMIN_GROUP" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "ADMIN_GROUP" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP "ADMIN_GROUP"; -CREATE RESOURCE GROUP "with" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "with" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "with" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP "with"; -CREATE RESOURCE GROUP "WITH" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "WITH" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "WITH" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP "WITH"; -CREATE RESOURCE GROUP "group" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "group" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "group" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP "group"; -CREATE RESOURCE GROUP "create" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "create" WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP "create" SET concurrency 2; SELECT * FROM rg_name_view; DROP RESOURCE GROUP "create"; @@ -193,28 +193,28 @@ DROP RESOURCE GROUP "create"; -- -- does not support single quotation marks around the name -CREATE RESOURCE GROUP 'must_fail' WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP 'must_fail' WITH (cpu_hard_quota_limit=10); -- does not support leading numbers -CREATE RESOURCE GROUP 0_must_fail WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP 0_must_fail WITH (cpu_hard_quota_limit=10); -- reserved names are not allowed even with double quotation marks -CREATE RESOURCE GROUP "default_group" WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP "admin_group" WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP "none" WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP default_group WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP admin_group WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP none WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP DEFAULT_GROUP WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP Admin_Group WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP NONE WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "default_group" WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP "admin_group" WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP "none" WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP default_group WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP admin_group WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP none WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP DEFAULT_GROUP WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP Admin_Group WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP NONE WITH (cpu_hard_quota_limit=10); -- keywords are not allowed without quotation marks -CREATE RESOURCE GROUP with WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP WITH WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP group WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP CREATE WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP with WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP WITH WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP group WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP CREATE WITH (cpu_hard_quota_limit=10); -- min length is 1 character -CREATE RESOURCE GROUP "" WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP "" WITH (cpu_hard_quota_limit=10); diff --git a/src/test/isolation2/sql/resgroup/resgroup_operator_memory.sql b/src/test/isolation2/sql/resgroup/resgroup_operator_memory.sql deleted file mode 100644 index 06227192cf4..00000000000 --- a/src/test/isolation2/sql/resgroup/resgroup_operator_memory.sql +++ /dev/null @@ -1,193 +0,0 @@ -SET optimizer TO off; - --- --- setup --- - ---start_ignore -DROP VIEW IF EXISTS many_ops; -DROP ROLE r1_opmem_test; -DROP RESOURCE GROUP rg1_opmem_test; -DROP RESOURCE GROUP rg2_opmem_test; -CREATE LANGUAGE plpython3u; ---end_ignore - --- a helper function to run query via SPI -CREATE OR REPLACE FUNCTION f1_opmem_test() RETURNS void AS $$ - plpy.execute("""select * from gp_dist_random('gp_id')""") -$$ LANGUAGE plpython3u; - --- this view contains many operators in the plan, which is used to trigger --- the issue. gp_toolkit.gp_resgroup_config is a large JOIN view of many --- relations, to prevent the source relations being optimized out from the plan --- we have to keep the columns provided by them in the target list, instead of --- composing a long SELECT c1,c2,... list we use SELECT * here, but we should --- not output the groupid as it changes each time. --- --- hashagg may not work with a small operator memory, so we use UNION ALL --- instead of UNION to prevent putting a hashagg on top of the append node, --- and we use a always-false WHERE condition to prevent too much output. -CREATE OR REPLACE VIEW many_ops AS - SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 -; - --- we must ensure spill to be small enough but still > 0. --- - rg1's memory quota is 682 * 1% = 6; --- - per-xact quota is 6/3=2; --- - spill memory is 2 * 60% = 1; -CREATE RESOURCE GROUP rg1_opmem_test - WITH (cpu_rate_limit=10, memory_limit=1, memory_shared_quota=0, - concurrency=3, memory_spill_ratio=60); - -CREATE ROLE r1_opmem_test RESOURCE GROUP rg1_opmem_test; -GRANT ALL ON many_ops TO r1_opmem_test; - --- rg1 has very low per-xact memory quota, there will be no enough operator --- memory reserved, however in resource group mode we assign at least 100KB to --- each operator, no matter it is memory intensive or not. As long as there is --- enough shared memory the query should be executed successfully. - --- --- positive: there is enough global shared memory --- - -SET gp_resgroup_memory_policy TO none; -SET ROLE TO r1_opmem_test; -SELECT * FROM many_ops; -RESET role; - -SET gp_resgroup_memory_policy TO eager_free; -SET ROLE TO r1_opmem_test; -SELECT * FROM many_ops; -RESET role; - -SET gp_resgroup_memory_policy TO auto; -SET ROLE TO r1_opmem_test; -SELECT * FROM many_ops; -RESET role; - --- --- negative: there is not enough shared memory --- - --- rg1 has no group level shared memory, and most memory are granted to rg2, --- there is only very little global shared memory due to integer rounding. -CREATE RESOURCE GROUP rg2_opmem_test - WITH (cpu_rate_limit=10, memory_limit=59); - --- this query can execute but will raise OOM error. - -SET gp_resgroup_memory_policy TO none; -SET ROLE TO r1_opmem_test; -SELECT * FROM many_ops; -RESET role; - -SET gp_resgroup_memory_policy TO eager_free; -SET ROLE TO r1_opmem_test; -SELECT * FROM many_ops; -RESET role; - -SET gp_resgroup_memory_policy TO auto; -SET ROLE TO r1_opmem_test; -SELECT * FROM many_ops; -RESET role; - --- --- positive: there is enough group shared memory --- - -ALTER RESOURCE GROUP rg2_opmem_test SET memory_limit 40; -ALTER RESOURCE GROUP rg1_opmem_test SET memory_limit 20; -ALTER RESOURCE GROUP rg1_opmem_test SET memory_shared_quota 100; -ALTER RESOURCE GROUP rg1_opmem_test SET memory_spill_ratio 20; - -SET gp_resgroup_memory_policy TO none; -SET ROLE TO r1_opmem_test; -SELECT * FROM many_ops; -RESET role; - -SET gp_resgroup_memory_policy TO eager_free; -SET ROLE TO r1_opmem_test; -SELECT * FROM many_ops; -RESET role; - -SET gp_resgroup_memory_policy TO auto; -SET ROLE TO r1_opmem_test; -SELECT * FROM many_ops; -RESET role; - --- --- positive: the spill memory is large enough, no adjustment is needed --- - -DROP RESOURCE GROUP rg2_opmem_test; -ALTER RESOURCE GROUP rg1_opmem_test SET memory_limit 40; -ALTER RESOURCE GROUP rg1_opmem_test SET memory_shared_quota 50; -ALTER RESOURCE GROUP rg1_opmem_test SET memory_spill_ratio 30; -ALTER RESOURCE GROUP rg1_opmem_test SET concurrency 1; - -SET gp_resgroup_memory_policy TO none; -SET ROLE TO r1_opmem_test; -SELECT * FROM many_ops; -RESET role; - -SET gp_resgroup_memory_policy TO eager_free; -SET ROLE TO r1_opmem_test; -SELECT * FROM many_ops; -RESET role; - -SET gp_resgroup_memory_policy TO auto; -SET ROLE TO r1_opmem_test; -SELECT * FROM many_ops; -RESET role; - --- --- positive: when spill memory is zero, work memory is used --- - -ALTER RESOURCE GROUP rg1_opmem_test SET memory_spill_ratio 0; - -SET gp_resgroup_memory_policy TO none; -SET ROLE TO r1_opmem_test; -SELECT * FROM many_ops; -SELECT f1_opmem_test(); -RESET role; - -SET gp_resgroup_memory_policy TO eager_free; -SET ROLE TO r1_opmem_test; -SELECT * FROM many_ops; -SELECT f1_opmem_test(); -RESET role; - -SET gp_resgroup_memory_policy TO auto; -SET ROLE TO r1_opmem_test; -SELECT * FROM many_ops; -SELECT f1_opmem_test(); -RESET role; - --- --- cleanup --- - -DROP VIEW many_ops; -DROP ROLE r1_opmem_test; -DROP RESOURCE GROUP rg1_opmem_test; diff --git a/src/test/isolation2/sql/resgroup/resgroup_parallel_queries.sql b/src/test/isolation2/sql/resgroup/resgroup_parallel_queries.sql index a0748913dd1..685fd199f14 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_parallel_queries.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_parallel_queries.sql @@ -78,12 +78,9 @@ LANGUAGE 'plpgsql'; 5:select dblink_connect('dblink_rg_test5', 'dbname=isolation2resgrouptest'); 6:select dblink_connect('dblink_rg_test6', 'dbname=isolation2resgrouptest'); -1>:select exec_commands_n('dblink_rg_test1','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_rate_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set concurrency #', 60, '', '1-6', false); -2>:select exec_commands_n('dblink_rg_test2','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_rate_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set concurrency#', 60, '', '1-6', false); -3>:select exec_commands_n('dblink_rg_test3','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_rate_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set cpu_rate_limit #', 60, '', '1-6', false); -4>:select exec_commands_n('dblink_rg_test4','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_rate_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set memory_limit #', 60, '', '1-6', false); -5>:select exec_commands_n('dblink_rg_test5','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_rate_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set memory_shared_quota #', 60, '', '1-6', false); -6>:select exec_commands_n('dblink_rg_test6','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_rate_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set memory_limit #', 60, '', '1-6', false); +1>:select exec_commands_n('dblink_rg_test1','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set concurrency #', 60, '', '1-6', false); +2>:select exec_commands_n('dblink_rg_test2','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set concurrency#', 60, '', '1-6', false); +3>:select exec_commands_n('dblink_rg_test3','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set cpu_hard_quota_limit #', 60, '', '1-6', false); 1<: 2<: @@ -122,14 +119,14 @@ select exec_commands_n('dblink_rg_test','DROP RESOURCE GROUP rg_test_g%', '', '' -- end_ignore -- create 6 roles and 6 resource groups -select exec_commands_n('dblink_rg_test','CREATE RESOURCE GROUP rg_test_g% WITH (concurrency=9, cpu_rate_limit=1, memory_limit=7)', '', '', 6, '1-6', '', true); +select exec_commands_n('dblink_rg_test','CREATE RESOURCE GROUP rg_test_g% WITH (concurrency=9, cpu_hard_quota_limit=1)', '', '', 6, '1-6', '', true); select exec_commands_n('dblink_rg_test','CREATE ROLE rg_test_r% login resource group rg_test_g%;', '', '', 6, '1-6', '', true); select exec_commands_n('dblink_rg_test','GRANT ALL ON rg_test_foo to rg_test_r%;', '', '', 6, '1-6', '', true); select exec_commands_n('dblink_rg_test','GRANT ALL ON rg_test_bar to rg_test_r%;', '', '', 6, '1-6', '', true); select dblink_disconnect('dblink_rg_test'); -select groupname, concurrency, cpu_rate_limit from gp_toolkit.gp_resgroup_config where groupname like 'rg_test_g%' order by groupname; +select groupname, concurrency, cpu_hard_quota_limit from gp_toolkit.gp_resgroup_config where groupname like 'rg_test_g%' order by groupname; -- -- 2* : DMLs @@ -161,17 +158,10 @@ select groupname, concurrency, cpu_rate_limit from gp_toolkit.gp_resgroup_config 31: select dblink_connect('dblink_rg_test31', 'dbname=isolation2resgrouptest'); 31>: select exec_commands_n('dblink_rg_test31', 'alter resource group rg_test_g% set concurrency #', 'select 1 from pg_sleep(0.1)', '', 1000, '1-6', '0-5', true); --- start a new session to alter cpu_rate_limit randomly +-- start a new session to alter cpu_hard_quota_limit randomly 32: select dblink_connect('dblink_rg_test32', 'dbname=isolation2resgrouptest'); -32>: select exec_commands_n('dblink_rg_test32', 'alter resource group rg_test_g% set cpu_rate_limit #', 'select 1 from pg_sleep(0.1)', '', 1000, '1-6', '1-6', true); +32>: select exec_commands_n('dblink_rg_test32', 'alter resource group rg_test_g% set cpu_hard_quota_limit #', 'select 1 from pg_sleep(0.1)', '', 1000, '1-6', '1-6', true); --- start a new session to alter memory_limit randomly -33: select dblink_connect('dblink_rg_test33', 'dbname=isolation2resgrouptest'); -33>: select exec_commands_n('dblink_rg_test33', 'alter resource group rg_test_g% set memory_limit #', 'select 1 from pg_sleep(0.1)', '', 1000, '1-6', '1-7', true); - --- start a new session to alter memory_shared_quota randomly -34: select dblink_connect('dblink_rg_test34', 'dbname=isolation2resgrouptest'); -34>: select exec_commands_n('dblink_rg_test34', 'alter resource group rg_test_g% set memory_shared_quota #', 'select 1 from pg_sleep(0.1)', '', 1000, '1-6', '1-80', true); -- -- 4* : CREATE/DROP tables & groups @@ -182,7 +172,7 @@ select groupname, concurrency, cpu_rate_limit from gp_toolkit.gp_resgroup_config -- start a new session to create & drop resource group 42: select dblink_connect('dblink_rg_test42', 'dbname=isolation2resgrouptest'); -42>: select exec_commands_n('dblink_rg_test42', 'create resource group rg_test_g7 with (cpu_rate_limit=1, memory_limit=1)', 'drop resource group rg_test_g7', '', 1000, '', '', true); +42>: select exec_commands_n('dblink_rg_test42', 'create resource group rg_test_g7 with (cpu_hard_quota_limit=1)', 'drop resource group rg_test_g7', '', 1000, '', '', true); 31<: 31: select exec_commands_n('dblink_rg_test31', 'alter resource group rg_test_g% set concurrency #', 'select 1 from pg_sleep(0.1)', '', 6, '1-6', '1-5', true); @@ -232,18 +222,11 @@ select groupname, concurrency, cpu_rate_limit from gp_toolkit.gp_resgroup_config 41q: 42q: -select groupname, concurrency::int < 7, cpu_rate_limit::int < 7 from gp_toolkit.gp_resgroup_config where groupname like 'rg_test_g%' order by groupname; +select groupname, concurrency::int < 7, cpu_hard_quota_limit::int < 7 from gp_toolkit.gp_resgroup_config where groupname like 'rg_test_g%' order by groupname; -- Beacuse concurrency of each resource group is changed between 1..6, so the num_queued must be larger than 0 select num_queued > 0 from gp_toolkit.gp_resgroup_status where rsgname like 'rg_test_g%' order by rsgname; --- After all queries finished in each resource group, the memory_usage should be zero, no memory leak -with t_1 as -( - select rsgname, row_to_json(json_each(memory_usage::json)) as j from gp_toolkit.gp_resgroup_status where rsgname like 'rg_test_g%' order by rsgname -) -select rsgname, sum(((j->'value')->>'used')::int) from t_1 group by rsgname ; - -- start_ignore drop table rg_test_foo; drop table rg_test_bar; @@ -256,7 +239,7 @@ select dblink_disconnect('dblink_rg_test'); -- -- 5*: Test connections in utility mode are not governed by resource group -- -create resource group rg_test_g8 with (concurrency= 1, cpu_rate_limit=1, memory_limit=1); +create resource group rg_test_g8 with (concurrency= 1, cpu_hard_quota_limit=1); create role rg_test_r8 login resource group rg_test_g8; 51:select dblink_connect('dblink_rg_test51', 'dbname=isolation2resgrouptest user=rg_test_r8 options=''-c gp_role=utility'''); 52:select dblink_connect('dblink_rg_test52', 'dbname=isolation2resgrouptest user=rg_test_r8 options=''-c gp_role=utility'''); diff --git a/src/test/isolation2/sql/resgroup/resgroup_query_mem.sql b/src/test/isolation2/sql/resgroup/resgroup_query_mem.sql deleted file mode 100644 index b047fb99304..00000000000 --- a/src/test/isolation2/sql/resgroup/resgroup_query_mem.sql +++ /dev/null @@ -1,63 +0,0 @@ --- This test is to verify that query_mem is set correctly in QEs. --- Previously, resgroup does not consider that different number of --- segments among coordinator and segments. Now we let QEs to re-calculate --- query_mem in each segment locally. This test case use the following --- steps to verify the new method's correctness: --- 1. fetch available memory in coordinator and a single segment, --- compute the ratio --- 2. use fault inject and plpython invokes pygresql with notice, --- get a distributed plan's sort's operator memory in a QE --- 3. Get sort's operator memory in a pure QD's plan (catalog order by) --- 4. compute the ratio of two operator memorys --- 5. these two ratios should be the same. - -create extension if not exists gp_inject_fault; -create or replace language plpython3u; - -create table t_qmem(a int); -select gp_inject_fault('rg_qmem_qd_qe', 'skip', dbid) from gp_segment_configuration where role = 'p' and content = 0; - -create function rg_qmem_test() returns boolean as $$ -from pg import DB -from copy import deepcopy -import re - -# 1: get resgroup available mem in QD and QE and compute ratio -sql = ("select memory_available m from " - "gp_toolkit.gp_resgroup_status_per_segment " - "where segment_id = %d and rsgname = 'admin_group'") -qd_mem = int(plpy.execute(sql % -1)[0]["m"]) -qe_mem = int(plpy.execute(sql % 0)[0]["m"]) -ratio1 = int(round(float(qd_mem) / qe_mem)) - -# 2. use notice to get qe operator mem -dbname = plpy.execute("select current_database() db")[0]["db"] -db = DB(dbname=dbname) -qe_opmem_info = [] -db.set_notice_receiver(lambda n: qe_opmem_info.append(deepcopy(n.message))) -sql = "select * from t_qmem order by 1" -db.query(sql) -qe_opmem = int(re.findall(r"op_mem=(\d+)", qe_opmem_info[0])[0]) -db.set_notice_receiver(None) - -# 3. get qd operator mem -sql = "explain analyze select * from pg_class order by relpages limit 10" -db.query("set gp_resgroup_print_operator_memory_limits = on;") -r = db.query(sql).getresult() -for (line, ) in r: - if "-> Sort" not in line: continue - qd_opmem = int(re.findall(r"operatorMem: (\d+)", line)[0]) - break - -db.close() - -ratio2 = int(round(float(qd_opmem) / qe_opmem)) - -return ratio1 == ratio2 - -$$ language plpython3u; - -select rg_qmem_test(); -select gp_inject_fault('rg_qmem_qd_qe', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0; -drop function rg_qmem_test(); -drop table t_qmem; diff --git a/src/test/isolation2/sql/resgroup/resgroup_recreate.sql b/src/test/isolation2/sql/resgroup/resgroup_recreate.sql index d49bbd3025b..4e6cc898aa8 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_recreate.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_recreate.sql @@ -3,8 +3,7 @@ DROP ROLE IF EXISTS r1; DROP RESOURCE GROUP rg1; -- end_ignore -CREATE RESOURCE GROUP rg1 WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=50, memory_shared_quota=0); +CREATE RESOURCE GROUP rg1 WITH (concurrency=2, cpu_hard_quota_limit=10); CREATE ROLE r1 RESOURCE GROUP rg1; 1: SET ROLE r1; @@ -13,8 +12,7 @@ CREATE ROLE r1 RESOURCE GROUP rg1; ALTER ROLE r1 RESOURCE GROUP none; DROP RESOURCE GROUP rg1; -CREATE RESOURCE GROUP rg1 WITH (concurrency=2, cpu_rate_limit=10, - memory_limit=50, memory_shared_quota=0); +CREATE RESOURCE GROUP rg1 WITH (concurrency=2, cpu_hard_quota_limit=10); ALTER ROLE r1 RESOURCE GROUP rg1; 1: BEGIN; diff --git a/src/test/isolation2/sql/resgroup/resgroup_seg_down_2pc.sql b/src/test/isolation2/sql/resgroup/resgroup_seg_down_2pc.sql index dc74ee25cb7..1a9da4fb260 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_seg_down_2pc.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_seg_down_2pc.sql @@ -10,7 +10,7 @@ alter system set gp_fts_probe_interval to 10; alter system set gp_fts_probe_retries to 0; select pg_reload_conf(); -1:create resource group rgroup_seg_down with (CPU_RATE_LIMIT=35, MEMORY_LIMIT=35, CONCURRENCY=10); +1:create resource group rgroup_seg_down with (cpu_hard_quota_limit=35, CONCURRENCY=10); -- inject an error in function dtm_broadcast_commit_prepared, that is before QD broadcasts commit prepared command to QEs 2:select gp_inject_fault_infinite('dtm_broadcast_commit_prepared', 'suspend', dbid) from gp_segment_configuration where role='p' and content=-1; diff --git a/src/test/isolation2/sql/resgroup/resgroup_set_memory_spill_ratio.sql b/src/test/isolation2/sql/resgroup/resgroup_set_memory_spill_ratio.sql deleted file mode 100644 index c29bbde7a0e..00000000000 --- a/src/test/isolation2/sql/resgroup/resgroup_set_memory_spill_ratio.sql +++ /dev/null @@ -1,83 +0,0 @@ --- This query must be the first one in this case. --- SHOW command will be bypassed in resgroup, when it's the first command --- in a connection it needs special handling to show memory_spill_ratio --- correctly. Verify that it shows the correct value 10 instead of default 20. -SHOW memory_spill_ratio; - ---start_ignore -DROP ROLE role1_spill_test; -DROP ROLE role2_spill_test; -DROP RESOURCE GROUP rg1_spill_test; -DROP RESOURCE GROUP rg2_spill_test; ---end_ignore - -CREATE RESOURCE GROUP rg1_spill_test WITH - (CONCURRENCY=10, MEMORY_LIMIT=10, CPU_RATE_LIMIT=10, memory_shared_quota=20, memory_spill_ratio=30); -CREATE RESOURCE GROUP rg2_spill_test WITH - (CONCURRENCY=10, MEMORY_LIMIT=10, CPU_RATE_LIMIT=10, memory_shared_quota=50, memory_spill_ratio=10); -CREATE ROLE role1_spill_test RESOURCE GROUP rg1_spill_test; -CREATE ROLE role2_spill_test RESOURCE GROUP rg2_spill_test; - --- positive set to resource group level ---start_ignore -SET ROLE role1_spill_test; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; ---end_ignore - --- positive set to session level -SET MEMORY_SPILL_RATIO TO 70; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; - --- positive fallback to statement_mem at session level -SET MEMORY_SPILL_RATIO TO 0; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; - --- negative set to session level -SET MEMORY_SPILL_RATIO TO 101; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; - --- positive set to session level -SET MEMORY_SPILL_RATIO TO 90; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; - --- positive set to session level -SET MEMORY_SPILL_RATIO TO 20; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; - --- reset to resource group level -RESET MEMORY_SPILL_RATIO; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; - --- positive set to session level -SET MEMORY_SPILL_RATIO TO 60; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; - --- change role, positive for session level -SET ROLE role2_spill_test; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; - --- positive set to session level -SET MEMORY_SPILL_RATIO TO 20; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; - --- reset to resource group level -RESET MEMORY_SPILL_RATIO; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; - --- cleanup -RESET ROLE; -DROP ROLE role1_spill_test; -DROP ROLE role2_spill_test; -DROP RESOURCE GROUP rg1_spill_test; -DROP RESOURCE GROUP rg2_spill_test; diff --git a/src/test/isolation2/sql/resgroup/resgroup_syntax.sql b/src/test/isolation2/sql/resgroup/resgroup_syntax.sql index bf0937b8663..6211902db63 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_syntax.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_syntax.sql @@ -50,43 +50,46 @@ SELECT * FROM gp_toolkit.gp_resgroup_config; -- negative -- can't create the reserved resource groups -CREATE RESOURCE GROUP default_group WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP admin_group WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP none WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP default_group WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP admin_group WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP none WITH (cpu_hard_quota_limit=10); + -- multiple resource groups can't share the same name -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10); DROP RESOURCE GROUP rg_test_group; --- must specify cpu_rate_limit or cpuset -CREATE RESOURCE GROUP rg_test_group WITH (memory_limit=10); + -- can't specify the resource limit type multiple times -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=1, cpu_rate_limit=5, memory_limit=5, concurrency=1); -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5, cpu_rate_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5, memory_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5, memory_shared_quota=70, memory_shared_quota=80); -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0', cpuset='0', memory_limit=5); --- can't specify both cpu_rate_limit and cpuset -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, cpuset='0', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (concurrency=1, cpu_hard_quota_limit=5, concurrency=1); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5, cpu_hard_quota_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0', cpuset='0'); + +-- can't specify both cpu_hard_quota_limit and cpuset +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5, cpuset='0'); + +-- cpu_soft_priority can't be negative value +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5, cpu_soft_priority=-100); + -- can't specify invalid cpuset -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='', memory_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpuset=',', memory_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-', memory_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='a', memory_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='12a', memory_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0-,', memory_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-1', memory_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='3-1', memory_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpuset=' 0 ', memory_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='4;a', memory_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-;4', memory_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpuset=';5', memory_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='5;', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset=''); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset=','); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-'); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='a'); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='12a'); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0-,'); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-1'); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='3-1'); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset=' 0 '); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='4;a'); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='-;4'); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset=';5'); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='5;'); ---- suppose the core numbered 1024 is not exist -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='1024', memory_limit=5); -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='1024'); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,'); -- can't alter to invalid cpuset -CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0', memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0'); ALTER RESOURCE GROUP rg_test_group set CPUSET ''; ALTER RESOURCE GROUP rg_test_group set CPUSET ','; ALTER RESOURCE GROUP rg_test_group set CPUSET '-'; @@ -115,125 +118,80 @@ DROP RESOURCE GROUP admin_group; DROP RESOURCE GROUP none; -- positive -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10); -SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10); +SELECT groupname,concurrency,cpu_hard_quota_limit, cpu_soft_priority FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; +DROP RESOURCE GROUP rg_test_group; +CREATE RESOURCE GROUP rg_test_group WITH (concurrency=1, cpuset='0'); +SELECT groupname,concurrency,cpu_hard_quota_limit, cpu_soft_priority FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; DROP RESOURCE GROUP rg_test_group; -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=1, cpuset='0', memory_limit=10, memory_shared_quota=70, memory_spill_ratio=30); -SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10, cpu_soft_priority=1000); +SELECT groupname,concurrency,cpu_hard_quota_limit, cpu_soft_priority FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; DROP RESOURCE GROUP rg_test_group; -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10); -SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=-1, cpu_soft_priority=1000); +SELECT groupname,concurrency,cpu_hard_quota_limit, cpu_soft_priority FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; +DROP RESOURCE GROUP rg_test_group; +CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0', cpu_soft_priority=1000); +SELECT groupname,concurrency,cpu_hard_quota_limit, cpu_soft_priority FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; DROP RESOURCE GROUP rg_test_group; CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0'); -SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; +SELECT groupname,concurrency,cpu_hard_quota_limit,cpu_soft_priority FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; DROP RESOURCE GROUP rg_test_group; CREATE RESOURCE GROUP rg_test_group WITH (cpuset='0;0-1'); -SELECT groupname,concurrency,cpu_rate_limit,memory_limit,memory_shared_quota,memory_spill_ratio +SELECT groupname,concurrency,cpu_hard_quota_limit,cpu_soft_priority,cpuset FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; DROP RESOURCE GROUP rg_test_group; -- ---------------------------------------------------------------------- -- Test: boundary check in create resource group syntax -- ---------------------------------------------------------------------- --- negative: cpu_rate_limit & memory_limit should be in [1, 100] --- if cpu_rate_limit equals -1, it will not be involved in sum -CREATE RESOURCE GROUP rg_test_group1 WITH (memory_limit=10, cpuset='0'); -CREATE RESOURCE GROUP rg_test_group2 WITH (cpu_rate_limit=60, memory_limit=10); -CREATE RESOURCE GROUP rg_test_group3 WITH (cpu_rate_limit=1, memory_limit=10); -DROP RESOURCE GROUP rg_test_group1; -DROP RESOURCE GROUP rg_test_group2; -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=61, memory_limit=10); -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=91); -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=0, memory_limit=10); -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=-1); -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=0.9); -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=1.9); +-- negative: cpu_hard_quota_limit should be in [1, 100] +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=101); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=0); + -- negative: concurrency should be in [1, max_connections] -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=-1, cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=26, cpu_rate_limit=10, memory_limit=10); --- negative: memory_auditor should be 'vmtracker' or 'cgroup' -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=0, cpu_rate_limit=10, memory_limit=10, memory_auditor="randomtext"); --- negative: concurrency should be zero for cgroup audited resource group -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=1, cpu_rate_limit=10, memory_limit=10, memory_auditor="cgroup"); +CREATE RESOURCE GROUP rg_test_group WITH (concurrency=-1, cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (concurrency=26, cpu_hard_quota_limit=10); + -- negative: the cores of cpuset in different groups mustn't overlap -CREATE RESOURCE GROUP rg_test_group1 WITH (cpuset='0', memory_limit=10); -CREATE RESOURCE GROUP rg_test_group2 WITH (cpuset='0', memory_limit=10); +CREATE RESOURCE GROUP rg_test_group1 WITH (cpuset='0'); +CREATE RESOURCE GROUP rg_test_group2 WITH (cpuset='0'); DROP RESOURCE GROUP rg_test_group1; --- memory_spill_ratio range is [0, 100] --- no limit on the sum of memory_shared_quota and memory_spill_ratio -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=10, memory_spill_ratio=0); -DROP RESOURCE GROUP rg_test_group; -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=50, memory_spill_ratio=51); -DROP RESOURCE GROUP rg_test_group; -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=10, memory_spill_ratio=-1); -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=-1, memory_spill_ratio=10); +-- negative: cpu_soft_priority should be in [1, +∞] +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10, cpu_soft_priority=0); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10, cpu_soft_priority=-1); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10, cpu_soft_priority=-1024); --- positive: cpu_rate_limit & memory_limit should be in [1, 100] -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=60, memory_limit=10); +-- positive: cpu_hard_quota_limit should be in [1, 100] +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=60); DROP RESOURCE GROUP rg_test_group; -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=60); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=1); DROP RESOURCE GROUP rg_test_group; -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=1, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10); DROP RESOURCE GROUP rg_test_group; -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=1); + +-- positive: cpu_soft_priority should be in [1, +∞] +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10, cpu_soft_priority=100); +DROP RESOURCE GROUP rg_test_group; +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10, cpu_soft_priority=10000); DROP RESOURCE GROUP rg_test_group; + -- positive: concurrency should be in [0, max_connections] -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=0, cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (concurrency=0, cpu_hard_quota_limit=10); DROP RESOURCE GROUP rg_test_group; -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=1, cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (concurrency=1, cpu_hard_quota_limit=10); DROP RESOURCE GROUP rg_test_group; -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=25, cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (concurrency=25, cpu_hard_quota_limit=10); DROP RESOURCE GROUP rg_test_group; -CREATE RESOURCE GROUP rg1_test_group WITH (concurrency=1, cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP rg2_test_group WITH (concurrency=1, cpu_rate_limit=50, memory_limit=50); -DROP RESOURCE GROUP rg1_test_group; -DROP RESOURCE GROUP rg2_test_group; -CREATE RESOURCE GROUP rg1_test_group WITH (concurrency=1, cpu_rate_limit=20, memory_limit=20); -CREATE RESOURCE GROUP rg2_test_group WITH (concurrency=1, cpu_rate_limit=40, memory_limit=40); -DROP RESOURCE GROUP rg1_test_group; -DROP RESOURCE GROUP rg2_test_group; -CREATE RESOURCE GROUP rg1_test_group WITH (concurrency=1, cpu_rate_limit=30, memory_limit=30); -CREATE RESOURCE GROUP rg2_test_group WITH (concurrency=1, cpu_rate_limit=30, memory_limit=30); +CREATE RESOURCE GROUP rg1_test_group WITH (concurrency=1, cpu_hard_quota_limit=10); +CREATE RESOURCE GROUP rg2_test_group WITH (concurrency=1, cpu_hard_quota_limit=500); DROP RESOURCE GROUP rg1_test_group; DROP RESOURCE GROUP rg2_test_group; --- positive: concurrency should be zero for cgroup audited resource group -CREATE RESOURCE GROUP rg_test_group WITH (concurrency=0, cpu_rate_limit=10, memory_limit=10, memory_auditor="cgroup"); -DROP RESOURCE GROUP rg_test_group; - --- memory_spill_ratio range is [0, 100] --- no limit on the sum of memory_shared_quota and memory_spill_ratio -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=0, memory_spill_ratio=1); -DROP RESOURCE GROUP rg_test_group; -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=50, memory_spill_ratio=50); -DROP RESOURCE GROUP rg_test_group; -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=0, memory_spill_ratio=100); -DROP RESOURCE GROUP rg_test_group; -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_shared_quota=99, memory_spill_ratio=1); -DROP RESOURCE GROUP rg_test_group; - --- negative: memory_spill_ratio does not accept out of range percentage values -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=-1); -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=101); - --- negative: memory_spill_ratio does not accept string values -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio='0'); -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio='10'); - --- negative: memory_spill_ratio does not accept float values -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=10.5); - --- negative: when memory_limit is unlimited memory_spill_ratio must be set to 0 -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=0, memory_spill_ratio=10); - --- positive -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=0, memory_spill_ratio=0); -DROP RESOURCE GROUP rg_test_group; -- ---------------------------------------------------------------------- -- Test: alter a resource group -- ---------------------------------------------------------------------- -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); -- ALTER RESOURCE GROUP SET CONCURRENCY N -- negative: concurrency should be in [1, max_connections] @@ -251,86 +209,30 @@ ALTER RESOURCE GROUP rg_test_group SET CONCURRENCY 1; ALTER RESOURCE GROUP rg_test_group SET CONCURRENCY 2; ALTER RESOURCE GROUP rg_test_group SET CONCURRENCY 25; --- ALTER RESOURCE GROUP SET CPU_RATE_LIMIT VALUE --- negative: cpu_rate_limit & memory_limit should be in [1, 100] -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT -0.1; -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT -1; -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 0; -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 0.7; -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 1.7; -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 61; -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT a; -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 'abc'; -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 20%; -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 0.2%; --- positive: cpu_rate_limit & memory_limit should be in [1, 100] -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 1; -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 2; -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 60; -DROP RESOURCE GROUP rg_test_group; --- positive: total cpu_rate_limit & memory_limit should be in [1, 100] -CREATE RESOURCE GROUP rg1_test_group WITH (cpu_rate_limit=10, memory_limit=10); -CREATE RESOURCE GROUP rg2_test_group WITH (cpu_rate_limit=10, memory_limit=10); -ALTER RESOURCE GROUP rg1_test_group SET CPU_RATE_LIMIT 50; -ALTER RESOURCE GROUP rg1_test_group SET CPU_RATE_LIMIT 40; -ALTER RESOURCE GROUP rg2_test_group SET CPU_RATE_LIMIT 20; -ALTER RESOURCE GROUP rg1_test_group SET CPU_RATE_LIMIT 30; -ALTER RESOURCE GROUP rg2_test_group SET CPU_RATE_LIMIT 30; -DROP RESOURCE GROUP rg1_test_group; -DROP RESOURCE GROUP rg2_test_group; --- positive: cpuset and cpu_rate_limit are exclusive, --- if cpu_rate_limit is set, cpuset is empty +-- ALTER RESOURCE GROUP SET cpu_hard_quota_limit VALUE +-- negative: cpu_hard_quota_limit should be in [1, 100] +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit -0.1; +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit -1; +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 0; +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 0.7; +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 1.7; +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 61; +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit a; +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 'abc'; +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 20%; +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 0.2%; +-- positive: cpu_hard_quota_limit should be in [1, 100] +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 1; +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 2; +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 60; +DROP RESOURCE GROUP rg_test_group; + +-- positive: cpuset and cpu_hard_quota_limit are exclusive, +-- if cpu_hard_quota_limit is set, cpuset is empty -- if cpuset is set, cpuset is -1 -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=10); ALTER RESOURCE GROUP rg_test_group SET CPUSET '0'; -SELECT groupname,cpu_rate_limit,memory_limit,cpuset FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; -ALTER RESOURCE GROUP rg_test_group SET CPU_RATE_LIMIT 10; -SELECT groupname,cpu_rate_limit,memory_limit,cpuset FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; -DROP RESOURCE GROUP rg_test_group; - -CREATE RESOURCE GROUP cgroup_audited_group WITH (concurrency=0, cpu_rate_limit=10, memory_limit=10, memory_auditor="cgroup"); --- negative: memory_auditor cannot be altered -ALTER RESOURCE GROUP cgroup_audited_group SET MEMORY_AUDITOR "default"; --- negative: concurrency should be zero for cgroup audited resource group -ALTER RESOURCE GROUP cgroup_audited_group SET CONCURRENCY 10; --- negative: role should not be assigned to a cgroup audited resource group -CREATE ROLE cgroup_audited_role RESOURCE GROUP cgroup_audited_group; -DROP RESOURCE GROUP cgroup_audited_group; - --- positive: memory_spill_ratio accepts integer values -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=20); -ALTER RESOURCE GROUP rg_test_group SET memory_spill_ratio 10; -DROP RESOURCE GROUP rg_test_group; - --- negative: memory_spill_ratio only accepts integer values -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=20); -ALTER RESOURCE GROUP rg_test_group SET memory_spill_ratio '10'; -ALTER RESOURCE GROUP rg_test_group SET memory_spill_ratio 10.5; -DROP RESOURCE GROUP rg_test_group; - --- negative: memory_spill_ratio does not accept out of range values -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=20); -ALTER RESOURCE GROUP rg_test_group SET memory_spill_ratio -1; -ALTER RESOURCE GROUP rg_test_group SET memory_spill_ratio 101; -DROP RESOURCE GROUP rg_test_group; - --- positive: memory_limit can be altered to unlimited if memory_spill_ratio is 0 -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=0); -ALTER RESOURCE GROUP rg_test_group SET memory_limit 0; -DROP RESOURCE GROUP rg_test_group; - --- negative: memory_spill_ratio can only be set to 0 if memory_limit is unlimited -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=0, memory_spill_ratio=0); -ALTER RESOURCE GROUP rg_test_group SET memory_spill_ratio 10; -DROP RESOURCE GROUP rg_test_group; - --- positive: memory_spill_ratio accepts a percentage value only if --- memory_limit is limited -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=0); -ALTER RESOURCE GROUP rg_test_group SET memory_spill_ratio 10; -DROP RESOURCE GROUP rg_test_group; - --- negative: memory_limit must be limited if memory_spill_ratio > 0 -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=10, memory_limit=10, memory_spill_ratio=10); -ALTER RESOURCE GROUP rg_test_group SET memory_limit 0; +SELECT groupname,cpu_hard_quota_limit,cpuset FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; +ALTER RESOURCE GROUP rg_test_group SET cpu_hard_quota_limit 10; +SELECT groupname,cpu_hard_quota_limit,cpuset FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; DROP RESOURCE GROUP rg_test_group; diff --git a/src/test/isolation2/sql/resgroup/resgroup_transaction.sql b/src/test/isolation2/sql/resgroup/resgroup_transaction.sql index d805ca3bc79..023cdf9c882 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_transaction.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_transaction.sql @@ -8,7 +8,7 @@ DROP RESOURCE GROUP rg_test_group; -- helper view to check the resgroup status CREATE OR REPLACE VIEW rg_test_monitor AS - SELECT groupname, concurrency, cpu_rate_limit + SELECT groupname, concurrency, cpu_hard_quota_limit FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_test_group'; @@ -18,12 +18,12 @@ CREATE OR REPLACE VIEW rg_test_monitor AS -- CREATE RESOURCE GROUP cannot run inside a transaction block BEGIN; - CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); + CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); END; SELECT * FROM rg_test_monitor; -- ALTER RESOURCE GROUP cannot run inside a transaction block -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); BEGIN; ALTER RESOURCE GROUP rg_test_group SET CONCURRENCY 10; END; @@ -45,12 +45,12 @@ DROP RESOURCE GROUP rg_test_group; -- CREATE RESOURCE GROUP cannot run inside a transaction block BEGIN; SELECT 1; - CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); + CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); END; SELECT * FROM rg_test_monitor; -- ALTER RESOURCE GROUP cannot run inside a transaction block -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); BEGIN; SELECT 1; ALTER RESOURCE GROUP rg_test_group SET CONCURRENCY 10; @@ -74,13 +74,13 @@ DROP RESOURCE GROUP rg_test_group; -- CREATE RESOURCE GROUP cannot run inside a subtransaction BEGIN; SAVEPOINT rg_savepoint; - CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); + CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); ROLLBACK TO SAVEPOINT rg_savepoint; ABORT; SELECT * FROM rg_test_monitor; -- ALTER RESOURCE GROUP cannot run inside a subtransaction -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); BEGIN; SAVEPOINT rg_savepoint; ALTER RESOURCE GROUP rg_test_group SET CONCURRENCY 10; @@ -103,7 +103,7 @@ DROP RESOURCE GROUP rg_test_group; -- ---------------------------------------------------------------------- CREATE OR REPLACE FUNCTION rg_create_func() RETURNS VOID -AS $$ CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5) $$ +AS $$ CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5) $$ LANGUAGE SQL; CREATE OR REPLACE FUNCTION rg_alter_func() RETURNS VOID @@ -119,7 +119,7 @@ SELECT * FROM rg_create_func(); SELECT * FROM rg_test_monitor; -- ALTER RESOURCE GROUP cannot run inside a function call -CREATE RESOURCE GROUP rg_test_group WITH (cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5); SELECT * FROM rg_alter_func(); SELECT * FROM rg_test_monitor; diff --git a/src/test/isolation2/sql/resgroup/resgroup_unassign_entrydb.sql b/src/test/isolation2/sql/resgroup/resgroup_unassign_entrydb.sql index 69516f7469d..b14836b1312 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_unassign_entrydb.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_unassign_entrydb.sql @@ -8,7 +8,7 @@ DROP ROLE IF EXISTS role_test; DROP RESOURCE GROUP rg_test; -- end_ignore -CREATE RESOURCE GROUP rg_test WITH (concurrency=2, cpu_rate_limit=10, memory_limit=10); +CREATE RESOURCE GROUP rg_test WITH (concurrency=2, cpu_hard_quota_limit=10); CREATE ROLE role_test RESOURCE GROUP rg_test; -- By pass this session, else this affects the testing session, i.e. 1: diff --git a/src/test/isolation2/sql/resgroup/resgroup_unlimit_memory_spill_ratio.sql b/src/test/isolation2/sql/resgroup/resgroup_unlimit_memory_spill_ratio.sql deleted file mode 100644 index 683bfc0680f..00000000000 --- a/src/test/isolation2/sql/resgroup/resgroup_unlimit_memory_spill_ratio.sql +++ /dev/null @@ -1,85 +0,0 @@ --- start_ignore -DROP RESOURCE GROUP rg_spill_test; --- end_ignore - --- create -CREATE RESOURCE GROUP rg_spill_test WITH -(concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50, memory_spill_ratio=60); -DROP RESOURCE GROUP rg_spill_test; - -CREATE RESOURCE GROUP rg_spill_test WITH -(concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50, memory_spill_ratio=0); -DROP RESOURCE GROUP rg_spill_test; - -CREATE RESOURCE GROUP rg_spill_test WITH -(concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50, memory_spill_ratio=100); -DROP RESOURCE GROUP rg_spill_test; - -CREATE RESOURCE GROUP rg_spill_test WITH -(concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50, memory_spill_ratio=-1); -DROP RESOURCE GROUP rg_spill_test; - -CREATE RESOURCE GROUP rg_spill_test WITH -(concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50, memory_spill_ratio=101); -DROP RESOURCE GROUP rg_spill_test; - --- alter -CREATE RESOURCE GROUP rg_spill_test WITH -(concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50, memory_spill_ratio=20); - -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 60; -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 0; -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 100; -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO -1; -ALTER RESOURCE GROUP rg_spill_test SET MEMORY_SPILL_RATIO 101; - -DROP RESOURCE GROUP rg_spill_test; - --- set GUC -CREATE RESOURCE GROUP rg_spill_test WITH -(concurrency=10, cpu_rate_limit=20, memory_limit=20, memory_shared_quota=50, memory_spill_ratio=20); - -SET MEMORY_SPILL_RATIO TO 60; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; - -SET MEMORY_SPILL_RATIO TO 0; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; - -SET MEMORY_SPILL_RATIO TO 100; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; - -SET MEMORY_SPILL_RATIO TO -1; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; - -SET MEMORY_SPILL_RATIO TO 101; -SHOW MEMORY_SPILL_RATIO; -SELECT 1; - -DROP RESOURCE GROUP rg_spill_test; - --- test case for query_mem=0 -CREATE TABLE test_zero_workmem(c int); - ---This test intends to build a situation that query_mem = 0 ---and verify under such condition work_mem will be used. -CREATE RESOURCE GROUP rg_zero_workmem WITH -(concurrency=2, cpu_rate_limit=10, memory_limit=20, memory_shared_quota=20, memory_spill_ratio=0); - -CREATE ROLE role_zero_workmem SUPERUSER RESOURCE GROUP rg_zero_workmem; -SET ROLE TO role_zero_workmem; - ---test query that will use spi -ANALYZE test_zero_workmem; - ---test normal DML -SELECT count(*) FROM test_zero_workmem; - ---clean env -RESET ROLE; -DROP TABLE test_zero_workmem; -DROP ROLE role_zero_workmem; -DROP RESOURCE GROUP rg_zero_workmem; diff --git a/src/test/isolation2/sql/resgroup/resgroup_views.sql b/src/test/isolation2/sql/resgroup/resgroup_views.sql index 2f751f41d44..acddfe3e68d 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_views.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_views.sql @@ -9,16 +9,12 @@ select rsgname , num_queued , num_executed , cpu_usage->'-1' as qd_cpu_usage - , memory_usage->'-1'->'used' as qd_memory_used - , memory_usage->'-1'->'shared_used' as qd_memory_shared_used from gp_toolkit.gp_resgroup_status where rsgname='default_group'; select rsgname , groupid , cpu - , memory_used - , memory_shared_used from gp_toolkit.gp_resgroup_status_per_host s join gp_segment_configuration c on s.hostname=c.hostname and c.content=-1 and role='p' @@ -28,8 +24,6 @@ select rsgname , groupid , segment_id , cpu - , memory_used - , memory_shared_used from gp_toolkit.gp_resgroup_status_per_segment where rsgname='default_group' and segment_id=-1; diff --git a/src/test/isolation2/sql/resgroup/restore_default_resgroup.sql b/src/test/isolation2/sql/resgroup/restore_default_resgroup.sql index de498aefffc..312878d155e 100644 --- a/src/test/isolation2/sql/resgroup/restore_default_resgroup.sql +++ b/src/test/isolation2/sql/resgroup/restore_default_resgroup.sql @@ -1,7 +1,6 @@ -- enable resource group and restart cluster. -- start_ignore ! gpconfig -c gp_resource_group_cpu_limit -v 0.9; -! gpconfig -c gp_resource_group_memory_limit -v 0.9; ! gpconfig -c gp_resource_manager -v group; -- 40 should be enough for the following cases and some @@ -12,7 +11,6 @@ show gp_resource_manager; show gp_resource_group_cpu_limit; -show gp_resource_group_memory_limit; show max_connections; -- by default admin_group has concurrency set to -1 which leads to diff --git a/src/test/regress/expected/resource_group.out b/src/test/regress/expected/resource_group.out index bea47ab4390..3b36e4e5e19 100644 --- a/src/test/regress/expected/resource_group.out +++ b/src/test/regress/expected/resource_group.out @@ -21,13 +21,13 @@ ERROR: resource group "rg_dump_test2" does not exist DROP RESOURCE GROUP rg_dump_test3; ERROR: resource group "rg_dump_test3" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_dump_test1 WITH (concurrency=2, cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_dump_test1 WITH (concurrency=2, cpu_hard_quota_limit=5); WARNING: resource group is disabled HINT: To enable set gp_resource_manager=group -CREATE RESOURCE GROUP rg_dump_test2 WITH (concurrency=2, cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_dump_test2 WITH (concurrency=2, cpu_hard_quota_limit=5); WARNING: resource group is disabled HINT: To enable set gp_resource_manager=group -CREATE RESOURCE GROUP rg_dump_test3 WITH (concurrency=2, cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_dump_test3 WITH (concurrency=2, cpu_hard_quota_limit=5); WARNING: resource group is disabled HINT: To enable set gp_resource_manager=group CREATE ROLE role_dump_test1 RESOURCE GROUP rg_dump_test1; diff --git a/src/test/regress/expected/resource_group_cpuset.out b/src/test/regress/expected/resource_group_cpuset.out index ac25a6209fa..45167881eeb 100644 --- a/src/test/regress/expected/resource_group_cpuset.out +++ b/src/test/regress/expected/resource_group_cpuset.out @@ -1,9 +1,13 @@ -- -- Test: cpuset cannot be specified when group is disabled. -- -CREATE RESOURCE GROUP resource_group1 WITH (memory_limit=5, cpuset='0'); +-- start_ignore +DROP RESOURCE GROUP resource_group1; +ERROR: resource group "resource_group1" does not exist +-- end_ignore +CREATE RESOURCE GROUP resource_group1 WITH (cpuset='0'); ERROR: resource group must be enabled to use cpuset feature -CREATE RESOURCE GROUP resource_group1 WITH (memory_limit=5, cpu_rate_limit=5); +CREATE RESOURCE GROUP resource_group1 WITH (cpu_hard_quota_limit=5); WARNING: resource group is disabled HINT: To enable set gp_resource_manager=group ALTER RESOURCE GROUP resource_group1 SET cpuset '0'; diff --git a/src/test/regress/expected/resource_group_gucs.out b/src/test/regress/expected/resource_group_gucs.out index 1c40665f1bf..3bcfe3a2936 100644 --- a/src/test/regress/expected/resource_group_gucs.out +++ b/src/test/regress/expected/resource_group_gucs.out @@ -2,33 +2,6 @@ -- in case any of them are removed by accident. -- do not care about the values / ranges / types -- start_ignore -\! gpconfig -s gp_resgroup_print_operator_memory_limits; -Values on all segments are consistent -GUC : gp_resgroup_print_operator_memory_limits -Coordinator value: off -Segment value: off --- end_ignore -\! echo $?; -0 --- start_ignore -\! gpconfig -s gp_resgroup_memory_policy_auto_fixed_mem; -Values on all segments are consistent -GUC : gp_resgroup_memory_policy_auto_fixed_mem -Coordinator value: 100kB -Segment value: 100kB --- end_ignore -\! echo $?; -0 --- start_ignore -\! gpconfig -s gp_resgroup_memory_policy; -Values on all segments are consistent -GUC : gp_resgroup_memory_policy -Coordinator value: eager_free -Segment value: eager_free --- end_ignore -\! echo $?; -0 --- start_ignore \! gpconfig -s gp_resource_group_cpu_priority; Values on all segments are consistent GUC : gp_resource_group_cpu_priority @@ -46,12 +19,3 @@ Segment value: 0.9 -- end_ignore \! echo $?; 0 --- start_ignore -\! gpconfig -s gp_resource_group_memory_limit; -Values on all segments are consistent -GUC : gp_resource_group_memory_limit -Coordinator value: 0.6924 -Segment value: 0.6924 --- end_ignore -\! echo $?; -0 diff --git a/src/test/regress/regress_gp.c b/src/test/regress/regress_gp.c index 0f4518ab7dd..0441d02eb52 100644 --- a/src/test/regress/regress_gp.c +++ b/src/test/regress/regress_gp.c @@ -79,7 +79,6 @@ extern Datum userdata_project(PG_FUNCTION_ARGS); /* Resource queue/group support */ extern Datum checkResourceQueueMemoryLimits(PG_FUNCTION_ARGS); extern Datum repeatPalloc(PG_FUNCTION_ARGS); -extern Datum resGroupPalloc(PG_FUNCTION_ARGS); /* Gang management test support */ extern Datum gangRaiseInfo(PG_FUNCTION_ARGS); @@ -621,31 +620,6 @@ repeatPalloc(PG_FUNCTION_ARGS) PG_RETURN_INT32(0); } -PG_FUNCTION_INFO_V1(resGroupPalloc); -Datum -resGroupPalloc(PG_FUNCTION_ARGS) -{ - float ratio = PG_GETARG_FLOAT8(0); - int memLimit, slotQuota, sharedQuota; - int size; - int count; - int i; - - if (!IsResGroupEnabled()) - PG_RETURN_INT32(0); - - ResGroupGetMemInfo(&memLimit, &slotQuota, &sharedQuota); - size = ceilf(memLimit * ratio); - count = size / 512; - for (i = 0; i < count; i++) - MemoryContextAlloc(TopMemoryContext, 512 * 1024 * 1024); - - size %= 512; - MemoryContextAlloc(TopMemoryContext, size * 1024 * 1024); - - PG_RETURN_INT32(0); -} - /* * This is do-nothing table function that passes the input relation * to the output relation without any modification. diff --git a/src/test/regress/sql/resource_group.sql b/src/test/regress/sql/resource_group.sql index 3907211a3de..28785bda09f 100644 --- a/src/test/regress/sql/resource_group.sql +++ b/src/test/regress/sql/resource_group.sql @@ -18,9 +18,9 @@ DROP RESOURCE GROUP rg_dump_test2; DROP RESOURCE GROUP rg_dump_test3; -- end_ignore -CREATE RESOURCE GROUP rg_dump_test1 WITH (concurrency=2, cpu_rate_limit=5, memory_limit=5); -CREATE RESOURCE GROUP rg_dump_test2 WITH (concurrency=2, cpu_rate_limit=5, memory_limit=5); -CREATE RESOURCE GROUP rg_dump_test3 WITH (concurrency=2, cpu_rate_limit=5, memory_limit=5); +CREATE RESOURCE GROUP rg_dump_test1 WITH (concurrency=2, cpu_hard_quota_limit=5); +CREATE RESOURCE GROUP rg_dump_test2 WITH (concurrency=2, cpu_hard_quota_limit=5); +CREATE RESOURCE GROUP rg_dump_test3 WITH (concurrency=2, cpu_hard_quota_limit=5); CREATE ROLE role_dump_test1 RESOURCE GROUP rg_dump_test1; CREATE ROLE role_dump_test2 RESOURCE GROUP rg_dump_test2; diff --git a/src/test/regress/sql/resource_group_cpuset.sql b/src/test/regress/sql/resource_group_cpuset.sql index 57e832c7bf0..95c1dd59dd0 100644 --- a/src/test/regress/sql/resource_group_cpuset.sql +++ b/src/test/regress/sql/resource_group_cpuset.sql @@ -6,8 +6,8 @@ DROP RESOURCE GROUP resource_group1; -- end_ignore -CREATE RESOURCE GROUP resource_group1 WITH (memory_limit=5, cpuset='0'); -CREATE RESOURCE GROUP resource_group1 WITH (memory_limit=5, cpu_rate_limit=5); +CREATE RESOURCE GROUP resource_group1 WITH (cpuset='0'); +CREATE RESOURCE GROUP resource_group1 WITH (cpu_hard_quota_limit=5); ALTER RESOURCE GROUP resource_group1 SET cpuset '0'; DROP RESOURCE GROUP resource_group1; diff --git a/src/test/regress/sql/resource_group_gucs.sql b/src/test/regress/sql/resource_group_gucs.sql index 8f939b29c66..e5b70b2fcfa 100644 --- a/src/test/regress/sql/resource_group_gucs.sql +++ b/src/test/regress/sql/resource_group_gucs.sql @@ -2,21 +2,6 @@ -- in case any of them are removed by accident. -- do not care about the values / ranges / types --- start_ignore -\! gpconfig -s gp_resgroup_print_operator_memory_limits; --- end_ignore -\! echo $?; - --- start_ignore -\! gpconfig -s gp_resgroup_memory_policy_auto_fixed_mem; --- end_ignore -\! echo $?; - --- start_ignore -\! gpconfig -s gp_resgroup_memory_policy; --- end_ignore -\! echo $?; - -- start_ignore \! gpconfig -s gp_resource_group_cpu_priority; -- end_ignore @@ -26,8 +11,3 @@ \! gpconfig -s gp_resource_group_cpu_limit; -- end_ignore \! echo $?; - --- start_ignore -\! gpconfig -s gp_resource_group_memory_limit; --- end_ignore -\! echo $?; From 492a9d8e8ba4288389a76e5db478e22361c23975 Mon Sep 17 00:00:00 2001 From: Huansong Fu Date: Wed, 14 Dec 2022 08:54:14 -0800 Subject: [PATCH 38/46] Initialize DLOG's oldestXmin up to the latestCompletedXid+1 In DistributedLog_AdvanceOldestXmin() we advance DLOG's idea of the oldestXmin to the "globalxmin" value, and also truncate all DLOG segments that only hold xids older than the oldestXmin. The oldestXmin can be xmax, i.e. the "latestCompletedXid" + 1, when e.g. there's no other concurrent running transactions. However, during postmaster restart we initialize the oldestXmin to be up to only latestCompletedXid. As a result, when we try to advance it again, we could try to access the segment that holds latestCompletedXid, which had been truncated before the restart. Fixing it now by initializing oldestXmin properly. Add a test for the same. Had to move the test file to isolation/input in order to import the regress.so for the test_consume_xids() function we need. --- src/backend/access/transam/distributedlog.c | 18 ++++- .../distributed_snapshot.source} | 54 +++++++++++++ .../distributed_snapshot.source} | 81 +++++++++++++++++++ 3 files changed, 152 insertions(+), 1 deletion(-) rename src/test/isolation2/{sql/distributed_snapshot.sql => input/distributed_snapshot.source} (81%) rename src/test/isolation2/{expected/distributed_snapshot.out => output/distributed_snapshot.source} (83%) diff --git a/src/backend/access/transam/distributedlog.c b/src/backend/access/transam/distributedlog.c index b59ca01f124..1f374ecd729 100644 --- a/src/backend/access/transam/distributedlog.c +++ b/src/backend/access/transam/distributedlog.c @@ -136,10 +136,26 @@ DistributedLog_InitOldestXmin(void) /* Advance to the first XID on the next page */ xid = AdvanceTransactionIdToNextPage(oldestXmin); - /* but don't go past oldestLocalXmin */ + /* + * But don't go past oldestLocalXmin + 1 which is the most + * we might've set the oldestXmin before restart (essentially + * it's same as the 'xmax' value in GetSnapshotData()). + * + * Note that, stopping here means that we don't have a page + * for oldestXmin, but we are fine because: + * + * (1) if we later assigned a new xid, that new xid is going + * to be the same as oldestXmin here. And we are guaranteed + * to have created DLOG segment for this new xid. + * (2) before we assigned any new xid, we don't really need to + * access DLOG for oldestXmin. Even if we call + * DistributedLog_AdvanceOldestXmin(), since we don't have + * any newer xid to advance to, the call would be a no-op. + */ if (TransactionIdFollows(xid, latestXid)) { oldestXmin = latestXid; + TransactionIdAdvance(oldestXmin); break; } diff --git a/src/test/isolation2/sql/distributed_snapshot.sql b/src/test/isolation2/input/distributed_snapshot.source similarity index 81% rename from src/test/isolation2/sql/distributed_snapshot.sql rename to src/test/isolation2/input/distributed_snapshot.source index d30b05afec1..f15fecd39a8 100644 --- a/src/test/isolation2/sql/distributed_snapshot.sql +++ b/src/test/isolation2/input/distributed_snapshot.source @@ -1,5 +1,9 @@ -- Distributed snapshot tests +create or replace function test_consume_xids(int4) returns void +as '@abs_srcdir@/../regress/regress.so', 'test_consume_xids' +language C; + -- Scenario1: Test to validate GetSnapshotData()'s computation of globalXmin using -- distributed snapshot. It mainly uses a old read-only transaction to help -- create situation where globalXmin can be lower than distributed oldestXmin @@ -296,3 +300,53 @@ insert into t_alter_snapshot_test values (1, 1); select * from t_alter_snapshot_test; drop table t_alter_snapshot_test; + +---------------------------------------- +-- Test for fixes +---------------------------------------- +-- Case 1. Test that when we advanced DLOG's oldestXmin to the +-- latestCompletedXid + 1, and that it is the first xid of the +-- next segment, we would truncate all DLOG segments (all txs +-- have completed and no longer needed). And in that case, we +-- should still be able to advance properly after restart. +create table distributed_snapshot_fix1(a int); + +-- On a primary, burn xids until the next xid is the first one of a segment, +-- which has 4096 (ENTRIES_PER_PAGE) * 32 (SLRU_PAGES_PER_SEGMENT) = 131072 xids. +-- Details about how we consume it: +-- 1. Using test_consume_xids to consume what's needed - 2; +-- 2. The current transaction consumes 1 xid; +-- 3. Use another transaction to consume 1 more. This is to mark the last +-- one completed so that after restart we can start from that. +1U: begin; +1U: select test_consume_xids((131070 - (cur % 131072))::int) from txid_current() cur; +1U: end; +1U: insert into distributed_snapshot_fix1 values(1); +1Uq: +1q: + +-- Restart server, so that DistributedLogCtl->shared->latest_page_number is +-- initialized to be the one that the next xid is on. When that happens, and +-- when we do DistributedLog_AdvanceOldestXmin() again in the next query, we +-- would successfully truncate the current working segment. +select pg_ctl(datadir, 'restart') from gp_segment_configuration where role = 'p' and content = 1; + +-- Do a SELECT. This assigns distributed snapshot but it won't assign new xid. +-- Since we'll advance to the next future xid which is the first xid of the next segment, +-- this will get all DLOG segments truncated. +1: select * from distributed_snapshot_fix1; + +-- Checking the DLOG segments we have right now, which is none. +1U: select count(*) from gp_distributed_log; + +1Uq: +1q: + +-- Restart server again. Previously DistributedLogShared->oldestXmin is initialized to +-- latestCompletedXid. +select pg_ctl(datadir, 'restart') from gp_segment_configuration where role = 'p' and content = 1; + +-- Do a SELECT. Previously this would complain about missing segment file because we've +-- truncated the segment that latestCompletedXid is on. Now we don't, because we will +-- be advancing from latestCompletedXid + 1. +1: select * from distributed_snapshot_fix1; diff --git a/src/test/isolation2/expected/distributed_snapshot.out b/src/test/isolation2/output/distributed_snapshot.source similarity index 83% rename from src/test/isolation2/expected/distributed_snapshot.out rename to src/test/isolation2/output/distributed_snapshot.source index 882d4ffd307..9a03cc04099 100644 --- a/src/test/isolation2/expected/distributed_snapshot.out +++ b/src/test/isolation2/output/distributed_snapshot.source @@ -1,5 +1,8 @@ -- Distributed snapshot tests +create or replace function test_consume_xids(int4) returns void as '@abs_srcdir@/../regress/regress.so', 'test_consume_xids' language C; +CREATE + -- Scenario1: Test to validate GetSnapshotData()'s computation of globalXmin using -- distributed snapshot. It mainly uses a old read-only transaction to help -- create situation where globalXmin can be lower than distributed oldestXmin @@ -529,3 +532,81 @@ select * from t_alter_snapshot_test; (2 rows) drop table t_alter_snapshot_test; DROP + +---------------------------------------- +-- Test for fixes +---------------------------------------- +-- Case 1. Test that when we advanced DLOG's oldestXmin to the +-- latestCompletedXid + 1, and that it is the first xid of the +-- next segment, we would truncate all DLOG segments (all txs +-- have completed and no longer needed). And in that case, we +-- should still be able to advance properly after restart. +create table distributed_snapshot_fix1(a int); +CREATE + +-- On a primary, burn xids until the next xid is the first one of a segment, +-- which has 4096 (ENTRIES_PER_PAGE) * 32 (SLRU_PAGES_PER_SEGMENT) = 131072 xids. +-- Details about how we consume it: +-- 1. Using test_consume_xids to consume what's needed - 2; +-- 2. The current transaction consumes 1 xid; +-- 3. Use another transaction to consume 1 more. This is to mark the last +-- one completed so that after restart we can start from that. +1U: begin; +BEGIN +1U: select test_consume_xids((131070 - (cur % 131072))::int) from txid_current() cur; + test_consume_xids +------------------- + +(1 row) +1U: end; +END +1U: insert into distributed_snapshot_fix1 values(1); +INSERT 1 +1Uq: ... +1q: ... + +-- Restart server, so that DistributedLogCtl->shared->latest_page_number is +-- initialized to be the one that the next xid is on. When that happens, and +-- when we do DistributedLog_AdvanceOldestXmin() again in the next query, we +-- would successfully truncate the current working segment. +select pg_ctl(datadir, 'restart') from gp_segment_configuration where role = 'p' and content = 1; + pg_ctl +-------- + OK +(1 row) + +-- Do a SELECT. This assigns distributed snapshot but it won't assign new xid. +-- Since we'll advance to the next future xid which is the first xid of the next segment, +-- this will get all DLOG segments truncated. +1: select * from distributed_snapshot_fix1; + a +--- + 1 +(1 row) + +-- Checking the DLOG segments we have right now, which is none. +1U: select count(*) from gp_distributed_log; + count +------- + 0 +(1 row) + +1Uq: ... +1q: ... + +-- Restart server again. Previously DistributedLogShared->oldestXmin is initialized to +-- latestCompletedXid. +select pg_ctl(datadir, 'restart') from gp_segment_configuration where role = 'p' and content = 1; + pg_ctl +-------- + OK +(1 row) + +-- Do a SELECT. Previously this would complain about missing segment file because we've +-- truncated the segment that latestCompletedXid is on. Now we don't, because we will +-- be advancing from latestCompletedXid + 1. +1: select * from distributed_snapshot_fix1; + a +--- + 1 +(1 row) From f27c52b3da9996488a91c3f802a38a216f178313 Mon Sep 17 00:00:00 2001 From: Zhenglong Li Date: Fri, 6 Jan 2023 13:49:18 +0800 Subject: [PATCH 39/46] clean up useless test cases and GUCs (#14712) Since #14562 removed some GUCs and the memory model of resource group, there have some legacy test cases and useless codes in the project, this PR will remove those codes and files. No more test needs, it's a clean process. --- gpMgmt/bin/gpcheckresgroupimpl | 17 +- .../unit/test_unit_gpcheckresgroupimpl.py | 85 +- .../test/behave/mgmt_utils/gpconfig.feature | 3 - src/backend/utils/resgroup/cgroup-ops-dummy.c | 7 - src/backend/utils/resgroup/resgroup.c | 9 +- src/include/utils/sync_guc_name.h | 1 - src/include/utils/unsync_guc_name.h | 1 - .../resgroup/resgroup_alter_concurrency.out | 330 +----- .../resgroup/resgroup_concurrency.out | 2 +- .../resgroup/resgroup_operator_memory.out | 286 ------ .../resgroup/resgroup_parallel_queries.out | 598 ----------- .../expected/resgroup/resgroup_query_mem.out | 50 - .../resgroup/restore_default_resgroup.out | 70 -- .../isolation2/isolation2_resgroup_schedule | 6 +- .../resgroup/resgroup_alter_memory.source | 942 ------------------ .../resgroup/resgroup_memory_limit.source | 923 ----------------- .../resgroup/resgroup_memory_runaway.source | 303 ------ .../resgroup/resgroup_memory_statistic.source | 302 ------ .../resgroup/resgroup_alter_concurrency.sql | 4 +- .../resgroup/resgroup_parallel_queries.sql | 269 ----- .../sql/resgroup/restore_default_resgroup.sql | 18 - 21 files changed, 24 insertions(+), 4202 deletions(-) delete mode 100644 src/test/isolation2/expected/resgroup/resgroup_operator_memory.out delete mode 100644 src/test/isolation2/expected/resgroup/resgroup_parallel_queries.out delete mode 100644 src/test/isolation2/expected/resgroup/resgroup_query_mem.out delete mode 100644 src/test/isolation2/expected/resgroup/restore_default_resgroup.out delete mode 100644 src/test/isolation2/output/resgroup/resgroup_alter_memory.source delete mode 100644 src/test/isolation2/output/resgroup/resgroup_memory_limit.source delete mode 100644 src/test/isolation2/output/resgroup/resgroup_memory_runaway.source delete mode 100644 src/test/isolation2/output/resgroup/resgroup_memory_statistic.source delete mode 100644 src/test/isolation2/sql/resgroup/resgroup_parallel_queries.sql delete mode 100644 src/test/isolation2/sql/resgroup/restore_default_resgroup.sql diff --git a/gpMgmt/bin/gpcheckresgroupimpl b/gpMgmt/bin/gpcheckresgroupimpl index d45acb6e69f..73725b606ba 100755 --- a/gpMgmt/bin/gpcheckresgroupimpl +++ b/gpMgmt/bin/gpcheckresgroupimpl @@ -44,12 +44,6 @@ class cgroup(object): if not self.compdirs: self.die("failed to detect cgroup component dirs.") - # 'memory' on 5X is special, although dir 'memory/gpdb' is optional, - # dir 'memory' is mandatory to provide 'memory.limit_in_bytes'. - # in such a case we must always put 'memory' in compdirs on 5X. - if gpver.version < [6, 0, 0] and 'memory' not in self.compdirs: - self.compdirs['memory'] = '' - self.validate_permission("cpu", "gpdb/", "rwx") self.validate_permission("cpu", "gpdb/cgroup.procs", "rw") self.validate_permission("cpu", "gpdb/cpu.cfs_period_us", "rw") @@ -61,16 +55,7 @@ class cgroup(object): self.validate_permission("cpuacct", "gpdb/cpuacct.usage", "r") self.validate_permission("cpuacct", "gpdb/cpuacct.stat", "r") - self.validate_permission("memory", "memory.limit_in_bytes", "r") - - # resgroup memory auditor is introduced in 6.0 devel and backported - # to 5.x branch since 5.6.1. To provide backward compatibilities - # memory permissions are only checked since 6.0. if gpver.version >= [6, 0, 0]: - self.validate_permission("memory", "gpdb/", "rwx") - self.validate_permission("memory", "gpdb/memory.limit_in_bytes", "rw") - self.validate_permission("memory", "gpdb/memory.usage_in_bytes", "r") - self.validate_permission("cpuset", "gpdb/", "rwx") self.validate_permission("cpuset", "gpdb/cgroup.procs", "rw") self.validate_permission("cpuset", "gpdb/cpuset.cpus", "rw") @@ -178,7 +163,7 @@ class cgroup(object): def required_comps(self): comps = ['cpu', 'cpuacct'] if gpver.version >= [6, 0, 0]: - comps.extend(['cpuset', 'memory']) + comps.extend(['cpuset']) return comps def fallback_comp_dirs(self): diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckresgroupimpl.py b/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckresgroupimpl.py index 4e6e0dcb366..31e5f270ea1 100644 --- a/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckresgroupimpl.py +++ b/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckresgroupimpl.py @@ -28,7 +28,6 @@ def setUp(self): os.mkdir(os.path.join(self.cgroup_mntpnt, "cpu"), 0o755) os.mkdir(os.path.join(self.cgroup_mntpnt, "cpuacct"), 0o755) - os.mkdir(os.path.join(self.cgroup_mntpnt, "memory"), 0o755) os.mkdir(os.path.join(self.cgroup_mntpnt, "cpuset"), 0o755) self.cgroup = gpcheckresgroupimpl.cgroup() @@ -49,13 +48,6 @@ def setUp(self): self.touch(os.path.join(self.cgroup_mntpnt, "cpuacct", "gpdb", "cpuacct.usage"), 0o400) self.touch(os.path.join(self.cgroup_mntpnt, "cpuacct", "gpdb", "cpuacct.stat"), 0o400) - self.touch(os.path.join(self.cgroup_mntpnt, "memory", "memory.limit_in_bytes"), 0o400) - self.touch(os.path.join(self.cgroup_mntpnt, "memory", "memory.memsw.limit_in_bytes"), 0o400) - - os.mkdir(os.path.join(self.cgroup_mntpnt, "memory", "gpdb"), 0o700) - self.touch(os.path.join(self.cgroup_mntpnt, "memory", "gpdb", "memory.limit_in_bytes"), 0o600) - self.touch(os.path.join(self.cgroup_mntpnt, "memory", "gpdb", "memory.usage_in_bytes"), 0o400) - os.mkdir(os.path.join(self.cgroup_mntpnt, "cpuset", "gpdb"), 0o700) self.touch(os.path.join(self.cgroup_mntpnt, "cpuset", "gpdb", "cgroup.procs"), 0o600) self.touch(os.path.join(self.cgroup_mntpnt, "cpuset", "gpdb", "cpuset.cpus"), 0o600) @@ -80,7 +72,7 @@ def test_comp_lists(self): # however it is necessary to verify this unit test is up-to-date. comps = ['cpu', 'cpuacct'] if gpver.version >= [6, 0, 0]: - comps.extend(['cpuset', 'memory']) + comps.extend(['cpuset']) self.assertEqual(self.cgroup.required_comps(), comps) def test_comp_dirs_validation(self): @@ -119,21 +111,6 @@ def test_comp_dirs_validation_when_cpuset_gpdb_dir_missing(self): else: self.assertTrue(self.cgroup.validate_comp_dirs()) - def test_comp_dirs_validation_when_memory_gpdb_dir_bad_permission(self): - os.chmod(os.path.join(self.cgroup_mntpnt, "memory", "gpdb"), 0o100) - if gpver.version >= [6, 0, 0]: - self.assertFalse(self.cgroup.validate_comp_dirs()) - else: - self.assertTrue(self.cgroup.validate_comp_dirs()) - os.chmod(os.path.join(self.cgroup_mntpnt, "memory", "gpdb"), 0o700) - - def test_comp_dirs_validation_when_memory_gpdb_dir_missing(self): - shutil.rmtree(os.path.join(self.cgroup_mntpnt, "memory", "gpdb")) - if gpver.version >= [6, 0, 0]: - self.assertFalse(self.cgroup.validate_comp_dirs()) - else: - self.assertTrue(self.cgroup.validate_comp_dirs()) - def test_proper_setup(self): self.cgroup.validate_all() @@ -247,66 +224,6 @@ def test_when_cpuacct_gpdb_cpuacct_stat_bad_permission(self): with self.assertRaisesRegex(AssertionError, "file '.*/cpuacct/gpdb/cpuacct.stat' permission denied: require permission 'r'"): self.cgroup.validate_all() - def test_when_memory_limit_in_bytes_missing(self): - os.unlink(os.path.join(self.cgroup_mntpnt, "memory", "memory.limit_in_bytes")) - with self.assertRaisesRegex(AssertionError, "file '.*/memory/memory.limit_in_bytes' does not exist"): - self.cgroup.validate_all() - - def test_when_memory_limit_in_bytes_bad_permission(self): - os.chmod(os.path.join(self.cgroup_mntpnt, "memory", "memory.limit_in_bytes"), 0o100) - with self.assertRaisesRegex(AssertionError, "file '.*/memory/memory.limit_in_bytes' permission denied: require permission 'r'"): - self.cgroup.validate_all() - - def test_when_memory_gpdb_dir_missing(self): - shutil.rmtree(os.path.join(self.cgroup_mntpnt, "memory", "gpdb")) - if gpver.version >= [6, 0, 0]: - with self.assertRaisesRegex(AssertionError, "directory '.*/memory/gpdb/' does not exist"): - self.cgroup.validate_all() - else: - self.cgroup.validate_all() - - def test_when_memory_gpdb_dir_bad_permission(self): - os.chmod(os.path.join(self.cgroup_mntpnt, "memory", "gpdb"), 0o500) - if gpver.version >= [6, 0, 0]: - with self.assertRaisesRegex(AssertionError, "directory '.*/memory/gpdb/' permission denied: require permission 'rwx'"): - self.cgroup.validate_all() - else: - self.cgroup.validate_all() - # restore permission for the dir to be removed in tearDown() - os.chmod(os.path.join(self.cgroup_mntpnt, "memory", "gpdb"), 0o700) - - def test_when_memory_gpdb_limit_in_bytes_missing(self): - os.unlink(os.path.join(self.cgroup_mntpnt, "memory", "gpdb", "memory.limit_in_bytes")) - if gpver.version >= [6, 0, 0]: - with self.assertRaisesRegex(AssertionError, "file '.*/memory/gpdb/memory.limit_in_bytes' does not exist"): - self.cgroup.validate_all() - else: - self.cgroup.validate_all() - - def test_when_memory_gpdb_limit_in_bytes_bad_permission(self): - os.chmod(os.path.join(self.cgroup_mntpnt, "memory", "gpdb", "memory.limit_in_bytes"), 0o100) - if gpver.version >= [6, 0, 0]: - with self.assertRaisesRegex(AssertionError, "file '.*/memory/gpdb/memory.limit_in_bytes' permission denied: require permission 'rw'"): - self.cgroup.validate_all() - else: - self.cgroup.validate_all() - - def test_when_memory_gpdb_usage_in_bytes_missing(self): - os.unlink(os.path.join(self.cgroup_mntpnt, "memory", "gpdb", "memory.usage_in_bytes")) - if gpver.version >= [6, 0, 0]: - with self.assertRaisesRegex(AssertionError, "file '.*/memory/gpdb/memory.usage_in_bytes' does not exist"): - self.cgroup.validate_all() - else: - self.cgroup.validate_all() - - def test_when_memory_gpdb_usage_in_bytes_bad_permission(self): - os.chmod(os.path.join(self.cgroup_mntpnt, "memory", "gpdb", "memory.usage_in_bytes"), 0o100) - if gpver.version >= [6, 0, 0]: - with self.assertRaisesRegex(AssertionError, "file '.*/memory/gpdb/memory.usage_in_bytes' permission denied: require permission 'r'"): - self.cgroup.validate_all() - else: - self.cgroup.validate_all() - def test_when_cpuset_gpdb_dir_missing(self): shutil.rmtree(os.path.join(self.cgroup_mntpnt, "cpuset", "gpdb")) if gpver.version >= [6, 0, 0]: diff --git a/gpMgmt/test/behave/mgmt_utils/gpconfig.feature b/gpMgmt/test/behave/mgmt_utils/gpconfig.feature index 04715babb0d..f3a505626a4 100644 --- a/gpMgmt/test/behave/mgmt_utils/gpconfig.feature +++ b/gpMgmt/test/behave/mgmt_utils/gpconfig.feature @@ -67,7 +67,6 @@ Feature: gpconfig integration tests Examples: | test_case | guc | type | seed_value | value | file_value | live_value | value_coordinator_only | file_value_coordinator_only | value_coordinator | file_value_coordinator | live_value_coordinator | | bool | log_connections | bool | off | on | on | on | off | off | off | off | off | - | enum | gp_resgroup_memory_policy | enum | eager_free | auto | auto | auto | eager_free | eager_free | eager_free | eager_free | eager_free | | integer | vacuum_cost_limit | integer | 300 | 400 | 400 | 400 | 555 | 555 | 500 | 500 | 500 | | integer with memory unit | statement_mem | int w/unit | 123MB | 500MB | 500MB | 500MB | 500MB | 500MB | 500MB | 500MB | 500MB | | integer with time unit | statement_timeout | int w/unit | 1min | 5min | 5min | 5min | 5min | 5min | 5min | 5min | 5min | @@ -117,7 +116,6 @@ Feature: gpconfig integration tests Examples: | guc | type | seed_value | value | file_value | live_value | | log_connections | bool | off | on | on | on | - | gp_resgroup_memory_policy | enum | eager_free | auto | auto | auto | | vacuum_cost_limit | integer | 300 | 400 | 400 | 400 | | checkpoint_completion_target | real | 0.4 | 0.5 | 0.5 | 0.5 | | application_name | string | xxxxxx | bodhi | bodhi | bodhi | @@ -174,7 +172,6 @@ Feature: gpconfig integration tests Examples: | guc | type | value | | log_connections | bool | off | - | gp_resgroup_memory_policy | enum | eager_free | | vacuum_cost_limit | integer | 300 | | checkpoint_completion_target | real | 0.4 | | application_name | string | bengie | diff --git a/src/backend/utils/resgroup/cgroup-ops-dummy.c b/src/backend/utils/resgroup/cgroup-ops-dummy.c index ab7a6897ce4..186a18e7c60 100644 --- a/src/backend/utils/resgroup/cgroup-ops-dummy.c +++ b/src/backend/utils/resgroup/cgroup-ops-dummy.c @@ -290,13 +290,6 @@ static CGroupOpsRoutine cGroupOpsRoutineDummy = { .setcpulimit = setcpulimit_dummy, .getcpuusage = getcpuusage_dummy, - - .gettotalmemory = gettotalmemory_dummy, - .getmemoryusage = getmemoryusage_dummy, - .setmemorylimit = setmemorylimit_dummy, - .getmemorylimitchunks = getmemorylimitchunks_dummy, - .setmemorylimitbychunks = setmemorylimitbychunks_dummy, - .getcpuset = getcpuset_dummy, .setcpuset = setcpuset_dummy, diff --git a/src/backend/utils/resgroup/resgroup.c b/src/backend/utils/resgroup/resgroup.c index 7994fa3064d..1aa1ba4fa6c 100644 --- a/src/backend/utils/resgroup/resgroup.c +++ b/src/backend/utils/resgroup/resgroup.c @@ -801,6 +801,10 @@ ResGroupAlterOnCommit(const ResourceGroupCallbackContext *callbackCtx) cpuset); } } + else if (callbackCtx->limittype == RESGROUP_LIMIT_TYPE_CONCURRENCY) + { + wakeupSlots(group, true); + } /* reset default group if cpuset has changed */ if (strcmp(callbackCtx->oldCaps.cpuset, callbackCtx->caps.cpuset) && gp_resource_group_enable_cgroup_cpuset) @@ -1521,10 +1525,7 @@ AssignResGroupOnMaster(void) if (shouldBypassQuery(debug_query_string)) { /* - * Although we decide to bypass this query we should load the - * memory_spill_ratio setting from the resgroup, otherwise a - * `SHOW memory_spill_ratio` command will output the default value 20 - * if it's the first query in the connection (make sure tab completion + * If it's the first query in the connection (make sure tab completion * is not triggered otherwise it will run some implicit query before * you execute the SHOW command). * diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index aded0b2a2e9..415e248ed5a 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -62,7 +62,6 @@ "gp_interconnect_type", "gp_log_endpoints", "gp_log_interconnect", - "gp_log_resgroup_memory", "gp_log_resqueue_memory", "gp_log_stack_trace_lines", "gp_log_suboverflow_statement", diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 0a3dce89896..1604d812d83 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -383,7 +383,6 @@ "max_wal_senders", "max_wal_size", "max_worker_processes", - "memory_spill_ratio", "min_dynamic_shared_memory", "min_parallel_index_scan_size", "min_parallel_table_scan_size", diff --git a/src/test/isolation2/expected/resgroup/resgroup_alter_concurrency.out b/src/test/isolation2/expected/resgroup/resgroup_alter_concurrency.out index 78456a883a7..639358fb54b 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_alter_concurrency.out +++ b/src/test/isolation2/expected/resgroup/resgroup_alter_concurrency.out @@ -5,7 +5,7 @@ DROP DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_hard_quota_limit=20, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=10); +CREATE RESOURCE GROUP rg_concurrency_test WITH(concurrency=1, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -39,196 +39,6 @@ SET ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2; ALTER -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------------------+-----------------+---------------------+-------- - rg_concurrency_test | Client | idle in transaction | BEGIN; - rg_concurrency_test | ResourceGroup | active | BEGIN; - rg_concurrency_test | ResourceGroup | active | BEGIN; -(3 rows) - -11:END; -END -11q: ... -21<: <... completed> -BEGIN -22<: <... completed> -BEGIN - -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------------------+-----------------+---------------------+-------- - rg_concurrency_test | Client | idle in transaction | BEGIN; - rg_concurrency_test | Client | idle in transaction | BEGIN; -(2 rows) - -21:END; -END -22:END; -END -21q: ... -22q: ... - -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------+-----------------+-------+------- -(0 rows) - --- --- 2. increase concurrency before pending queries --- - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 1; -ALTER - -11:SET ROLE role_concurrency_test; -SET -11:BEGIN; -BEGIN - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2; -ALTER - -21:SET ROLE role_concurrency_test; -SET -22:SET ROLE role_concurrency_test; -SET -21&:BEGIN; -22&:BEGIN; - -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------------------+-----------------+---------------------+-------- - rg_concurrency_test | Client | idle in transaction | BEGIN; - rg_concurrency_test | ResourceGroup | active | BEGIN; - rg_concurrency_test | ResourceGroup | active | BEGIN; -(3 rows) - -11:END; -END -11q: ... -21<: <... completed> -BEGIN -22<: <... completed> -BEGIN - -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------------------+-----------------+---------------------+-------- - rg_concurrency_test | Client | idle in transaction | BEGIN; - rg_concurrency_test | Client | idle in transaction | BEGIN; -(2 rows) - -21:END; -END -22:END; -END -21q: ... -22q: ... - -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------+-----------------+-------+------- -(0 rows) - --- --- 3. increase both concurrency & memory_shared_quota after pending queries --- - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 1; -ALTER -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_SHARED_QUOTA 60; -ALTER - -11:SET ROLE role_concurrency_test; -SET -11:BEGIN; -BEGIN - -21:SET ROLE role_concurrency_test; -SET -22:SET ROLE role_concurrency_test; -SET -21&:BEGIN; -22&:BEGIN; - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2; -ALTER - -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------------------+-----------------+---------------------+-------- - rg_concurrency_test | Client | idle in transaction | BEGIN; - rg_concurrency_test | ResourceGroup | active | BEGIN; - rg_concurrency_test | ResourceGroup | active | BEGIN; -(3 rows) - -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_SHARED_QUOTA 20; -ALTER - -21<: <... completed> -BEGIN - -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------------------+-----------------+---------------------+-------- - rg_concurrency_test | Client | idle in transaction | BEGIN; - rg_concurrency_test | Client | idle in transaction | BEGIN; - rg_concurrency_test | ResourceGroup | active | BEGIN; -(3 rows) - -11:END; -END -11q: ... -22<: <... completed> -BEGIN - -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------------------+-----------------+---------------------+-------- - rg_concurrency_test | Client | idle in transaction | BEGIN; - rg_concurrency_test | Client | idle in transaction | BEGIN; -(2 rows) - -21:END; -END -22:END; -END -21q: ... -22q: ... - -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------+-----------------+-------+------- -(0 rows) - --- --- 4. increase both concurrency & memory_shared_quota before pending queries --- - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 1; -ALTER -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_SHARED_QUOTA 60; -ALTER - -11:SET ROLE role_concurrency_test; -SET -11:BEGIN; -BEGIN - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2; -ALTER -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_SHARED_QUOTA 20; -ALTER - -21:SET ROLE role_concurrency_test; -SET -22:SET ROLE role_concurrency_test; -SET -21:BEGIN; -BEGIN -22&:BEGIN; - SELECT * FROM rg_activity_status; rsgname | wait_event_type | state | query ---------------------+-----------------+---------------------+-------- @@ -240,97 +50,8 @@ SELECT * FROM rg_activity_status; 11:END; END 11q: ... -22<: <... completed> -BEGIN - -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------------------+-----------------+---------------------+-------- - rg_concurrency_test | Client | idle in transaction | BEGIN; - rg_concurrency_test | Client | idle in transaction | BEGIN; -(2 rows) - -21:END; -END -22:END; -END -21q: ... -22q: ... - -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------+-----------------+-------+------- -(0 rows) - --- --- 5. increase both concurrency & memory_limit after pending queries --- - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 1; -ALTER -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_SHARED_QUOTA 0; -ALTER -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_LIMIT 30; -ALTER - --- proc 11 gets a quota of 30/1=30 -11:SET ROLE role_concurrency_test; -SET -11:BEGIN; -BEGIN - -21:SET ROLE role_concurrency_test; -SET -22:SET ROLE role_concurrency_test; -SET -21&:BEGIN; -22&:BEGIN; - -ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2; -ALTER --- now a new query needs a quota of 30/2=15 to run, --- there is no free quota at the moment, so 21 & 22 are still pending -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------------------+-----------------+---------------------+-------- - rg_concurrency_test | Client | idle in transaction | BEGIN; - rg_concurrency_test | ResourceGroup | active | BEGIN; - rg_concurrency_test | ResourceGroup | active | BEGIN; -(3 rows) - -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_LIMIT 50; -ALTER --- now a new query needs a quota of 50/2=25 to run, --- but there is only 50-30=20 free quota, so 21 & 22 are still pending -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------------------+-----------------+---------------------+-------- - rg_concurrency_test | Client | idle in transaction | BEGIN; - rg_concurrency_test | ResourceGroup | active | BEGIN; - rg_concurrency_test | ResourceGroup | active | BEGIN; -(3 rows) - -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_LIMIT 60; -ALTER --- now a new query needs a quota of 60/2=30 to run, --- and there is 60-30=30 free quota, so 21 gets executed and 22 is still pending - 21<: <... completed> BEGIN - -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------------------+-----------------+---------------------+-------- - rg_concurrency_test | Client | idle in transaction | BEGIN; - rg_concurrency_test | Client | idle in transaction | BEGIN; - rg_concurrency_test | ResourceGroup | active | BEGIN; -(3 rows) - -11:END; -END --- 11 releases its quota, so there is now 30 free quota, --- so 22 gets executed -11q: ... 22<: <... completed> BEGIN @@ -354,17 +75,12 @@ SELECT * FROM rg_activity_status; (0 rows) -- --- 6. increase both concurrency & memory_limit before pending queries +-- 2. increase concurrency before pending queries -- ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 1; ALTER -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_SHARED_QUOTA 0; -ALTER -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_LIMIT 30; -ALTER --- proc 11 gets a quota of 30/1=30 11:SET ROLE role_concurrency_test; SET 11:BEGIN; @@ -372,19 +88,6 @@ BEGIN ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2; ALTER --- now a new query needs a quota of 30/2=15 to run, --- there is no free quota at the moment -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ----------------------+-----------------+---------------------+-------- - rg_concurrency_test | Client | idle in transaction | BEGIN; -(1 row) - -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_LIMIT 60; -ALTER --- now a new query needs a quota of 60/2=30 to run, --- and there is 60-30=30 free quota, --- so one new query can get executed immediately 21:SET ROLE role_concurrency_test; SET @@ -392,8 +95,6 @@ SET SET 21:BEGIN; BEGIN --- proc 21 gets executed, there is no free quota now, --- so proc 22 is pending 22&:BEGIN; SELECT * FROM rg_activity_status; @@ -406,8 +107,6 @@ SELECT * FROM rg_activity_status; 11:END; END --- 11 releases its quota, so there is now 30 free quota, --- so 22 gets executed 11q: ... 22<: <... completed> BEGIN @@ -432,12 +131,8 @@ SELECT * FROM rg_activity_status; (0 rows) -- --- 7. decrease concurrency +-- 3. decrease concurrency -- -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_LIMIT 50; -ALTER -ALTER RESOURCE GROUP rg_concurrency_test SET MEMORY_SHARED_QUOTA 0; -ALTER ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 10; ALTER 11:SET ROLE role_concurrency_test; @@ -484,13 +179,13 @@ SELECT pg_sleep(1); -- end_ignore -- --- 8. increase concurrency from 0 +-- 4. increase concurrency from 0 -- DROP ROLE role_concurrency_test; DROP DROP RESOURCE GROUP rg_concurrency_test; DROP -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_hard_quota_limit=20, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=10); +CREATE RESOURCE GROUP rg_concurrency_test WITH(concurrency=0, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -520,7 +215,7 @@ END 11q: ... -- --- 9.1 decrease concurrency to 0, +-- 5.1 decrease concurrency to 0, -- without running queries, -- without pending queries. -- @@ -540,7 +235,7 @@ SELECT * FROM rg_activity_status; (0 rows) -- --- 9.2 decrease concurrency to 0, +-- 5.2 decrease concurrency to 0, -- with running queries, -- without pending queries. -- @@ -575,7 +270,7 @@ END 11q: ... -- --- 9.3 decrease concurrency to 0, +-- 5.3 decrease concurrency to 0, -- with running queries, -- with pending queries. -- @@ -632,7 +327,7 @@ SELECT * FROM rg_activity_status; ---------+-----------------+-------+------- (0 rows) --- 10: drop a resgroup with concurrency=0 and pending queries +-- 6: drop a resgroup with concurrency=0 and pending queries DROP ROLE IF EXISTS role_concurrency_test; DROP -- start_ignore @@ -640,7 +335,7 @@ DROP RESOURCE GROUP rg_concurrency_test; DROP -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_hard_quota_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -659,7 +354,7 @@ BEGIN END 61q: ... --- 11: drop a role with concurrency=0 and pending queries +-- 7: drop a role with concurrency=0 and pending queries DROP ROLE IF EXISTS role_concurrency_test; DROP -- start_ignore @@ -667,7 +362,7 @@ DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore -CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_hard_quota_limit=20, memory_limit=20); +CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=0, cpu_hard_quota_limit=20); CREATE CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test; CREATE @@ -694,3 +389,4 @@ DROP DROP RESOURCE GROUP rg_concurrency_test; ERROR: resource group "rg_concurrency_test" does not exist -- end_ignore + diff --git a/src/test/isolation2/expected/resgroup/resgroup_concurrency.out b/src/test/isolation2/expected/resgroup/resgroup_concurrency.out index d691c034e1b..6c2f87d5223 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_concurrency.out +++ b/src/test/isolation2/expected/resgroup/resgroup_concurrency.out @@ -105,7 +105,7 @@ ALTER SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test'; rsgname | num_running | num_queueing | num_queued | num_executed ---------------------+-------------+--------------+------------+-------------- - rg_concurrency_test | 2 | 1 | 1 | 2 + rg_concurrency_test | 3 | 0 | 1 | 3 (1 row) SELECT concurrency FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_concurrency_test'; concurrency diff --git a/src/test/isolation2/expected/resgroup/resgroup_operator_memory.out b/src/test/isolation2/expected/resgroup/resgroup_operator_memory.out deleted file mode 100644 index ee0d64f6eea..00000000000 --- a/src/test/isolation2/expected/resgroup/resgroup_operator_memory.out +++ /dev/null @@ -1,286 +0,0 @@ -SET optimizer TO off; -SET - --- --- setup --- - ---start_ignore -DROP VIEW IF EXISTS many_ops; -DROP ROLE r1_opmem_test; -DROP RESOURCE GROUP rg1_opmem_test; -DROP RESOURCE GROUP rg2_opmem_test; -CREATE LANGUAGE plpython3u; ---end_ignore - --- a helper function to run query via SPI -CREATE OR REPLACE FUNCTION f1_opmem_test() RETURNS void AS $$ plpy.execute("""select * from gp_dist_random('gp_id')""") $$ LANGUAGE plpython3u; -CREATE - --- this view contains many operators in the plan, which is used to trigger --- the issue. gp_toolkit.gp_resgroup_config is a large JOIN view of many --- relations, to prevent the source relations being optimized out from the plan --- we have to keep the columns provided by them in the target list, instead of --- composing a long SELECT c1,c2,... list we use SELECT * here, but we should --- not output the groupid as it changes each time. --- --- hashagg may not work with a small operator memory, so we use UNION ALL --- instead of UNION to prevent putting a hashagg on top of the append node, --- and we use a always-false WHERE condition to prevent too much output. -CREATE OR REPLACE VIEW many_ops AS SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 UNION ALL SELECT * FROM gp_toolkit.gp_resgroup_config WHERE groupid=0 ; -CREATE - --- we must ensure spill to be small enough but still > 0. --- - rg1's memory quota is 682 * 1% = 6; --- - per-xact quota is 6/3=2; --- - spill memory is 2 * 60% = 1; -CREATE RESOURCE GROUP rg1_opmem_test WITH (cpu_hard_quota_limit=10, memory_limit=1, memory_shared_quota=0, concurrency=3, memory_spill_ratio=60); -CREATE - -CREATE ROLE r1_opmem_test RESOURCE GROUP rg1_opmem_test; -CREATE -GRANT ALL ON many_ops TO r1_opmem_test; -GRANT - --- rg1 has very low per-xact memory quota, there will be no enough operator --- memory reserved, however in resource group mode we assign at least 100KB to --- each operator, no matter it is memory intensive or not. As long as there is --- enough shared memory the query should be executed successfully. - --- --- positive: there is enough global shared memory --- - -SET gp_resgroup_memory_policy TO none; -SET -SET ROLE TO r1_opmem_test; -SET -SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- -(0 rows) -RESET role; -RESET - -SET gp_resgroup_memory_policy TO eager_free; -SET -SET ROLE TO r1_opmem_test; -SET -SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- -(0 rows) -RESET role; -RESET - -SET gp_resgroup_memory_policy TO auto; -SET -SET ROLE TO r1_opmem_test; -SET -SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- -(0 rows) -RESET role; -RESET - --- --- negative: there is not enough shared memory --- - --- rg1 has no group level shared memory, and most memory are granted to rg2, --- there is only very little global shared memory due to integer rounding. -CREATE RESOURCE GROUP rg2_opmem_test WITH (cpu_hard_quota_limit=10, memory_limit=59); -CREATE - --- this query can execute but will raise OOM error. - -SET gp_resgroup_memory_policy TO none; -SET -SET ROLE TO r1_opmem_test; -SET -SELECT * FROM many_ops; -ERROR: Out of memory -DETAIL: Resource group memory limit reached -RESET role; -RESET - -SET gp_resgroup_memory_policy TO eager_free; -SET -SET ROLE TO r1_opmem_test; -SET -SELECT * FROM many_ops; -ERROR: Out of memory -DETAIL: Resource group memory limit reached -RESET role; -RESET - -SET gp_resgroup_memory_policy TO auto; -SET -SET ROLE TO r1_opmem_test; -SET -SELECT * FROM many_ops; -ERROR: Out of memory -DETAIL: Resource group memory limit reached -RESET role; -RESET - --- --- positive: there is enough group shared memory --- - -ALTER RESOURCE GROUP rg2_opmem_test SET memory_limit 40; -ALTER -ALTER RESOURCE GROUP rg1_opmem_test SET memory_limit 20; -ALTER -ALTER RESOURCE GROUP rg1_opmem_test SET memory_shared_quota 100; -ALTER -ALTER RESOURCE GROUP rg1_opmem_test SET memory_spill_ratio 20; -ALTER - -SET gp_resgroup_memory_policy TO none; -SET -SET ROLE TO r1_opmem_test; -SET -SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- -(0 rows) -RESET role; -RESET - -SET gp_resgroup_memory_policy TO eager_free; -SET -SET ROLE TO r1_opmem_test; -SET -SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- -(0 rows) -RESET role; -RESET - -SET gp_resgroup_memory_policy TO auto; -SET -SET ROLE TO r1_opmem_test; -SET -SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- -(0 rows) -RESET role; -RESET - --- --- positive: the spill memory is large enough, no adjustment is needed --- - -DROP RESOURCE GROUP rg2_opmem_test; -DROP -ALTER RESOURCE GROUP rg1_opmem_test SET memory_limit 40; -ALTER -ALTER RESOURCE GROUP rg1_opmem_test SET memory_shared_quota 50; -ALTER -ALTER RESOURCE GROUP rg1_opmem_test SET memory_spill_ratio 30; -ALTER -ALTER RESOURCE GROUP rg1_opmem_test SET concurrency 1; -ALTER - -SET gp_resgroup_memory_policy TO none; -SET -SET ROLE TO r1_opmem_test; -SET -SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- -(0 rows) -RESET role; -RESET - -SET gp_resgroup_memory_policy TO eager_free; -SET -SET ROLE TO r1_opmem_test; -SET -SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- -(0 rows) -RESET role; -RESET - -SET gp_resgroup_memory_policy TO auto; -SET -SET ROLE TO r1_opmem_test; -SET -SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- -(0 rows) -RESET role; -RESET - --- --- positive: when spill memory is zero, work memory is used --- - -ALTER RESOURCE GROUP rg1_opmem_test SET memory_spill_ratio 0; -ALTER - -SET gp_resgroup_memory_policy TO none; -SET -SET ROLE TO r1_opmem_test; -SET -SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- -(0 rows) -SELECT f1_opmem_test(); - f1_opmem_test ---------------- - -(1 row) -RESET role; -RESET - -SET gp_resgroup_memory_policy TO eager_free; -SET -SET ROLE TO r1_opmem_test; -SET -SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- -(0 rows) -SELECT f1_opmem_test(); - f1_opmem_test ---------------- - -(1 row) -RESET role; -RESET - -SET gp_resgroup_memory_policy TO auto; -SET -SET ROLE TO r1_opmem_test; -SET -SELECT * FROM many_ops; - groupid | groupname | concurrency | cpu_hard_quota_limit | memory_limit | memory_shared_quota | memory_spill_ratio | memory_auditor | cpuset ----------+-----------+-------------+----------------+--------------+---------------------+--------------------+----------------+-------- -(0 rows) -SELECT f1_opmem_test(); - f1_opmem_test ---------------- - -(1 row) -RESET role; -RESET - --- --- cleanup --- - -DROP VIEW many_ops; -DROP -DROP ROLE r1_opmem_test; -DROP -DROP RESOURCE GROUP rg1_opmem_test; -DROP diff --git a/src/test/isolation2/expected/resgroup/resgroup_parallel_queries.out b/src/test/isolation2/expected/resgroup/resgroup_parallel_queries.out deleted file mode 100644 index 1b30bfe2ada..00000000000 --- a/src/test/isolation2/expected/resgroup/resgroup_parallel_queries.out +++ /dev/null @@ -1,598 +0,0 @@ --- start_matchsubs --- m/ERROR: tuple concurrently updated \(heapam\.c\:\d+\)/ --- s/\(heapam\.c:\d+\)// --- end_matchsubs -CREATE EXTENSION dblink; -CREATE - --- This function execute commands N times. --- % in command will be replaced by number specified by range1 sequentially --- # in command will be replaced by number specified by range2 randomly --- range, eg: 1-10 --- Notice: now it only support SELECT statement return single integer -CREATE or replace FUNCTION exec_commands_n /*in func*/ (dl_name text, command1 text, /*in func*/ command2 text, command3 text, /*in func*/ times integer, range1 text, range2 text, fail_on_error bool) /*in func*/ RETURNS integer AS $$ /*in func*/ DECLARE /*in func*/ cmd text; /*in func*/ res int; /*in func*/ s_r1 int; /*in func*/ e_r1 int; /*in func*/ s_r2 int; /*in func*/ e_r2 int; /*in func*/ BEGIN /*in func*/ s_r1 = 0; /*in func*/ e_r1 = 0; /*in func*/ s_r2 = 0; /*in func*/ e_r2 = 0; /*in func*/ IF length(range1) > 0 THEN /*in func*/ select t[1]::int, t[2]::int into s_r1, e_r1 from regexp_split_to_array(range1, '-') t; /*in func*/ END IF; /*in func*/ IF length(range2) > 0 THEN /*in func*/ select t[1]::int, t[2]::int into s_r2, e_r2 from regexp_split_to_array(range2, '-') t; /*in func*/ END IF; /*in func*/ FOR i IN 0..(times - 1) LOOP /*in func*/ IF length(command1) > 0 THEN /*in func*/ cmd = regexp_replace(command1, '%', (s_r1 + i % (e_r1 - s_r1 + 1))::text, 'g'); /*in func*/ cmd = regexp_replace(cmd, '#', (s_r2 + ((random()*100)::int) % (e_r2 - s_r2 + 1))::text, 'g'); /*in func*/ RAISE NOTICE '%', cmd; /*in func*/ IF lower(cmd) like 'select %' THEN /*in func*/ select * into res from dblink(dl_name, cmd, fail_on_error) t(c1 integer); /*in func*/ ELSE /*in func*/ perform dblink_exec(dl_name, cmd , fail_on_error); /*in func*/ END IF; /*in func*/ END IF; /*in func*/ IF length(command2) > 0 THEN /*in func*/ cmd = regexp_replace(command2, '%', (s_r1 + i % (e_r1 - s_r1 + 1))::text, 'g'); /*in func*/ cmd = regexp_replace(cmd, '#', (s_r2 + ((random()*100)::int) % (e_r2 - s_r2 + 1))::text, 'g'); /*in func*/ RAISE NOTICE '%', cmd; /*in func*/ IF lower(cmd) like 'select %' THEN /*in func*/ select * into res from dblink(dl_name, cmd, fail_on_error) t(c1 integer); /*in func*/ ELSE /*in func*/ perform dblink_exec(dl_name, cmd, fail_on_error); /*in func*/ END IF; /*in func*/ END IF; /*in func*/ IF length(command3) > 0 THEN /*in func*/ cmd = regexp_replace(command3, '%', (s_r1 + i % (e_r1 - s_r1 + 1))::text, 'g'); /*in func*/ cmd = regexp_replace(cmd, '#', (s_r2 + ((random()*100)::int) % (e_r2 - s_r2 + 1))::text, 'g'); /*in func*/ RAISE NOTICE '%', cmd; /*in func*/ IF lower(cmd) like 'select %' THEN /*in func*/ select * into res from dblink(dl_name, cmd, fail_on_error) t(c1 integer); /*in func*/ ELSE /*in func*/ perform dblink_exec(dl_name, cmd, fail_on_error); /*in func*/ END IF; /*in func*/ END IF; /*in func*/ END LOOP; /*in func*/ return times; /*in func*/ END;$$ /*in func*/ LANGUAGE 'plpgsql'; -CREATE - --- --- DDLs vs DDLs --- -1:select dblink_connect('dblink_rg_test1', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -2:select dblink_connect('dblink_rg_test2', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -3:select dblink_connect('dblink_rg_test3', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -4:select dblink_connect('dblink_rg_test4', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -5:select dblink_connect('dblink_rg_test5', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -6:select dblink_connect('dblink_rg_test6', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) - -1>:select exec_commands_n('dblink_rg_test1','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set concurrency #', 60, '', '1-6', false); -2>:select exec_commands_n('dblink_rg_test2','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set concurrency#', 60, '', '1-6', false); -3>:select exec_commands_n('dblink_rg_test3','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set cpu_hard_quota_limit #', 60, '', '1-6', false); -4>:select exec_commands_n('dblink_rg_test4','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set memory_limit #', 60, '', '1-6', false); -5>:select exec_commands_n('dblink_rg_test5','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set memory_shared_quota #', 60, '', '1-6', false); -6>:select exec_commands_n('dblink_rg_test6','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#, memory_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set memory_limit #', 60, '', '1-6', false); - -1<: <... completed> - exec_commands_n ------------------ - 60 -(1 row) -2<: <... completed> - exec_commands_n ------------------ - 60 -(1 row) -3<: <... completed> - exec_commands_n ------------------ - 60 -(1 row) -4<: <... completed> - exec_commands_n ------------------ - 60 -(1 row) -5<: <... completed> - exec_commands_n ------------------ - 60 -(1 row) -6<: <... completed> - exec_commands_n ------------------ - 60 -(1 row) - -1: select dblink_disconnect('dblink_rg_test1'); - dblink_disconnect -------------------- - OK -(1 row) -2: select dblink_disconnect('dblink_rg_test2'); - dblink_disconnect -------------------- - OK -(1 row) -3: select dblink_disconnect('dblink_rg_test3'); - dblink_disconnect -------------------- - OK -(1 row) -4: select dblink_disconnect('dblink_rg_test4'); - dblink_disconnect -------------------- - OK -(1 row) -5: select dblink_disconnect('dblink_rg_test5'); - dblink_disconnect -------------------- - OK -(1 row) -6: select dblink_disconnect('dblink_rg_test6'); - dblink_disconnect -------------------- - OK -(1 row) - - -1q: ... -2q: ... -3q: ... -4q: ... -5q: ... -6q: ... --- --- DDLs vs DMLs --- --- Prepare resource groups and roles and tables -create table rg_test_foo as select i as c1, i as c2 from generate_series(1,1000) i; -CREATE 1000 -create table rg_test_bar as select i as c1, i as c2 from generate_series(1,1000) i; -CREATE 1000 -grant all on rg_test_foo to public; -GRANT -grant all on rg_test_bar to public; -GRANT - --- start_ignore -select dblink_connect('dblink_rg_test', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -select exec_commands_n('dblink_rg_test','DROP ROLE rg_test_r%', '', '', 7, '1-7', '', false); - exec_commands_n ------------------ - 7 -(1 row) -select exec_commands_n('dblink_rg_test','DROP RESOURCE GROUP rg_test_g%', '', '', 7, '1-7', '', false); - exec_commands_n ------------------ - 7 -(1 row) --- end_ignore - --- create 6 roles and 6 resource groups -select exec_commands_n('dblink_rg_test','CREATE RESOURCE GROUP rg_test_g% WITH (concurrency=9, cpu_hard_quota_limit=1, memory_limit=7)', '', '', 6, '1-6', '', true); - exec_commands_n ------------------ - 6 -(1 row) -select exec_commands_n('dblink_rg_test','CREATE ROLE rg_test_r% login resource group rg_test_g%;', '', '', 6, '1-6', '', true); - exec_commands_n ------------------ - 6 -(1 row) -select exec_commands_n('dblink_rg_test','GRANT ALL ON rg_test_foo to rg_test_r%;', '', '', 6, '1-6', '', true); - exec_commands_n ------------------ - 6 -(1 row) -select exec_commands_n('dblink_rg_test','GRANT ALL ON rg_test_bar to rg_test_r%;', '', '', 6, '1-6', '', true); - exec_commands_n ------------------ - 6 -(1 row) - -select dblink_disconnect('dblink_rg_test'); - dblink_disconnect -------------------- - OK -(1 row) - -select groupname, concurrency, cpu_hard_quota_limit from gp_toolkit.gp_resgroup_config where groupname like 'rg_test_g%' order by groupname; - groupname | concurrency | cpu_hard_quota_limit -------------+-------------+---------------- - rg_test_g1 | 9 | 1 - rg_test_g2 | 9 | 1 - rg_test_g3 | 9 | 1 - rg_test_g4 | 9 | 1 - rg_test_g5 | 9 | 1 - rg_test_g6 | 9 | 1 -(6 rows) - --- --- 2* : DMLs --- --- start 6 session to concurrently change resource group and run simple queries randomly --- BEGIN/END -21: select dblink_connect('dblink_rg_test21', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -21>: select exec_commands_n('dblink_rg_test21', 'set role rg_test_r#', 'BEGIN', 'END', 24000, '', '1-6', true); --- BEGIN/ABORT -22: select dblink_connect('dblink_rg_test22', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -22>: select exec_commands_n('dblink_rg_test22', 'set role rg_test_r#', 'BEGIN', 'ABORT', 24000, '', '1-6', true); --- query with memory sensitive node -23: select dblink_connect('dblink_rg_test23', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -23>: select exec_commands_n('dblink_rg_test23', 'set role rg_test_r#', 'insert into rg_test_foo values (#, #)', 'select count(*) from rg_test_bar t1, rg_test_foo t2 where t1.c2=t2.c2 group by t1.c2', 3000, '', '1-6', true); --- high cpu -24: select dblink_connect('dblink_rg_test24', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -24>: select exec_commands_n('dblink_rg_test24', 'set role rg_test_r#', 'insert into rg_test_bar values (#, #)', 'select count(*) from rg_test_bar where c2! = 1000', 60, '', '1-6', true); --- simple select -25: select dblink_connect('dblink_rg_test25', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -25>: select exec_commands_n('dblink_rg_test25', 'set role rg_test_r#', 'select count(*) from rg_test_foo', 'select count(*) from rg_test_bar', 6000, '', '1-6', true); --- vacuum -26: select dblink_connect('dblink_rg_test26', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -26>: select exec_commands_n('dblink_rg_test26', 'set role rg_test_r#', 'vacuum rg_test_bar', 'vacuum rg_test_foo', 6000, '', '1-6', true); - --- --- 3* : Alter groups --- --- start a new session to alter concurrency randomly -31: select dblink_connect('dblink_rg_test31', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -31>: select exec_commands_n('dblink_rg_test31', 'alter resource group rg_test_g% set concurrency #', 'select 1 from pg_sleep(0.1)', '', 1000, '1-6', '0-5', true); - --- start a new session to alter cpu_hard_quota_limit randomly -32: select dblink_connect('dblink_rg_test32', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -32>: select exec_commands_n('dblink_rg_test32', 'alter resource group rg_test_g% set cpu_hard_quota_limit #', 'select 1 from pg_sleep(0.1)', '', 1000, '1-6', '1-6', true); - --- start a new session to alter memory_limit randomly -33: select dblink_connect('dblink_rg_test33', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -33>: select exec_commands_n('dblink_rg_test33', 'alter resource group rg_test_g% set memory_limit #', 'select 1 from pg_sleep(0.1)', '', 1000, '1-6', '1-7', true); - --- start a new session to alter memory_shared_quota randomly -34: select dblink_connect('dblink_rg_test34', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -34>: select exec_commands_n('dblink_rg_test34', 'alter resource group rg_test_g% set memory_shared_quota #', 'select 1 from pg_sleep(0.1)', '', 1000, '1-6', '1-80', true); - --- --- 4* : CREATE/DROP tables & groups --- --- start a new session to create and drop table, it will cause massive catchup interrupt. -41: select dblink_connect('dblink_rg_test41', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -41>: select exec_commands_n('dblink_rg_test41', 'drop table if exists rg_test_t%', 'create table rg_test_t% (c1 int, c2 int)' ,'', 3000, '1-6', '', true); - --- start a new session to create & drop resource group -42: select dblink_connect('dblink_rg_test42', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -42>: select exec_commands_n('dblink_rg_test42', 'create resource group rg_test_g7 with (cpu_hard_quota_limit=1, memory_limit=1)', 'drop resource group rg_test_g7', '', 1000, '', '', true); - -31<: <... completed> - exec_commands_n ------------------ - 1000 -(1 row) -31: select exec_commands_n('dblink_rg_test31', 'alter resource group rg_test_g% set concurrency #', 'select 1 from pg_sleep(0.1)', '', 6, '1-6', '1-5', true); - exec_commands_n ------------------ - 6 -(1 row) - --- start a new session to acquire the status of resource groups -44: select dblink_connect('dblink_rg_test44', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -44>: select exec_commands_n('dblink_rg_test44', 'select count(*) from gp_toolkit.gp_resgroup_status;', '', '', 100, '', '', true); - --- wait all sessions to finish -21<: <... completed> - exec_commands_n ------------------ - 24000 -(1 row) -22<: <... completed> - exec_commands_n ------------------ - 24000 -(1 row) -23<: <... completed> - exec_commands_n ------------------ - 3000 -(1 row) -24<: <... completed> - exec_commands_n ------------------ - 60 -(1 row) -25<: <... completed> - exec_commands_n ------------------ - 6000 -(1 row) -26<: <... completed> - exec_commands_n ------------------ - 6000 -(1 row) -32<: <... completed> - exec_commands_n ------------------ - 1000 -(1 row) -33<: <... completed> - exec_commands_n ------------------ - 1000 -(1 row) -34<: <... completed> - exec_commands_n ------------------ - 1000 -(1 row) -41<: <... completed> - exec_commands_n ------------------ - 3000 -(1 row) -42<: <... completed> - exec_commands_n ------------------ - 1000 -(1 row) -44<: <... completed> - exec_commands_n ------------------ - 100 -(1 row) - -21: select dblink_disconnect('dblink_rg_test21'); - dblink_disconnect -------------------- - OK -(1 row) -22: select dblink_disconnect('dblink_rg_test22'); - dblink_disconnect -------------------- - OK -(1 row) -23: select dblink_disconnect('dblink_rg_test23'); - dblink_disconnect -------------------- - OK -(1 row) -24: select dblink_disconnect('dblink_rg_test24'); - dblink_disconnect -------------------- - OK -(1 row) -25: select dblink_disconnect('dblink_rg_test25'); - dblink_disconnect -------------------- - OK -(1 row) -26: select dblink_disconnect('dblink_rg_test26'); - dblink_disconnect -------------------- - OK -(1 row) -31: select dblink_disconnect('dblink_rg_test31'); - dblink_disconnect -------------------- - OK -(1 row) -32: select dblink_disconnect('dblink_rg_test32'); - dblink_disconnect -------------------- - OK -(1 row) -33: select dblink_disconnect('dblink_rg_test33'); - dblink_disconnect -------------------- - OK -(1 row) -34: select dblink_disconnect('dblink_rg_test34'); - dblink_disconnect -------------------- - OK -(1 row) -41: select dblink_disconnect('dblink_rg_test41'); - dblink_disconnect -------------------- - OK -(1 row) -42: select dblink_disconnect('dblink_rg_test42'); - dblink_disconnect -------------------- - OK -(1 row) -44: select dblink_disconnect('dblink_rg_test44'); - dblink_disconnect -------------------- - OK -(1 row) - -21q: ... -22q: ... -23q: ... -24q: ... -25q: ... -26q: ... -31q: ... -32q: ... -33q: ... -34q: ... -41q: ... -42q: ... - -select groupname, concurrency::int < 7, cpu_hard_quota_limit::int < 7 from gp_toolkit.gp_resgroup_config where groupname like 'rg_test_g%' order by groupname; - groupname | ?column? | ?column? -------------+----------+---------- - rg_test_g1 | t | t - rg_test_g2 | t | t - rg_test_g3 | t | t - rg_test_g4 | t | t - rg_test_g5 | t | t - rg_test_g6 | t | t -(6 rows) - --- Beacuse concurrency of each resource group is changed between 1..6, so the num_queued must be larger than 0 -select num_queued > 0 from gp_toolkit.gp_resgroup_status where rsgname like 'rg_test_g%' order by rsgname; - ?column? ----------- - t - t - t - t - t - t -(6 rows) - --- After all queries finished in each resource group, the memory_usage should be zero, no memory leak -with t_1 as ( select rsgname, row_to_json(json_each(memory_usage::json)) as j from gp_toolkit.gp_resgroup_status where rsgname like 'rg_test_g%' order by rsgname ) select rsgname, sum(((j->'value')->>'used')::int) from t_1 group by rsgname ; - rsgname | sum -------------+----- - rg_test_g1 | 0 - rg_test_g2 | 0 - rg_test_g3 | 0 - rg_test_g4 | 0 - rg_test_g5 | 0 - rg_test_g6 | 0 -(6 rows) - --- start_ignore -drop table rg_test_foo; -DROP -drop table rg_test_bar; -DROP -select dblink_connect('dblink_rg_test', 'dbname=isolation2resgrouptest'); - dblink_connect ----------------- - OK -(1 row) -select exec_commands_n('dblink_rg_test','DROP ROLE rg_test_r%', '', '', 6, '1-6', '', true); - exec_commands_n ------------------ - 6 -(1 row) -select exec_commands_n('dblink_rg_test','DROP RESOURCE GROUP rg_test_g%', '', '', 6, '1-6', '', true); - exec_commands_n ------------------ - 6 -(1 row) -select dblink_disconnect('dblink_rg_test'); - dblink_disconnect -------------------- - OK -(1 row) --- end_ignore - --- --- 5*: Test connections in utility mode are not governed by resource group --- -create resource group rg_test_g8 with (concurrency= 1, cpu_hard_quota_limit=1, memory_limit=1); -CREATE -create role rg_test_r8 login resource group rg_test_g8; -CREATE -51:select dblink_connect('dblink_rg_test51', 'dbname=isolation2resgrouptest user=rg_test_r8 options=''-c gp_role=utility'''); - dblink_connect ----------------- - OK -(1 row) -52:select dblink_connect('dblink_rg_test52', 'dbname=isolation2resgrouptest user=rg_test_r8 options=''-c gp_role=utility'''); - dblink_connect ----------------- - OK -(1 row) -53:select dblink_connect('dblink_rg_test53', 'dbname=isolation2resgrouptest user=rg_test_r8 options=''-c gp_role=utility'''); - dblink_connect ----------------- - OK -(1 row) - -51>:select exec_commands_n('dblink_rg_test51', 'select 1', 'begin', 'end', 100, '', '', true); -51<: <... completed> - exec_commands_n ------------------ - 100 -(1 row) -52>:select exec_commands_n('dblink_rg_test52', 'select 1', 'select 1', 'select 1', 100, '', '', true); -52<: <... completed> - exec_commands_n ------------------ - 100 -(1 row) -53>:select exec_commands_n('dblink_rg_test53', 'select 1', 'begin', 'abort', 100, '', '', true); -53<: <... completed> - exec_commands_n ------------------ - 100 -(1 row) - -51: select dblink_disconnect('dblink_rg_test51'); - dblink_disconnect -------------------- - OK -(1 row) -52: select dblink_disconnect('dblink_rg_test52'); - dblink_disconnect -------------------- - OK -(1 row) -53: select dblink_disconnect('dblink_rg_test53'); - dblink_disconnect -------------------- - OK -(1 row) - -51q: ... -52q: ... -53q: ... - --- num_executed and num_queued must be zero -select num_queued, num_executed from gp_toolkit.gp_resgroup_status where rsgname = 'rg_test_g8'; - num_queued | num_executed -------------+-------------- - 0 | 0 -(1 row) -drop role rg_test_r8; -DROP -drop resource group rg_test_g8; -DROP - --- clean up -select * from gp_toolkit.gp_resgroup_config; - groupid | groupname | concurrency | proposed_concurrency | cpu_hard_quota_limit | memory_limit | proposed_memory_limit | memory_shared_quota | proposed_memory_shared_quota | memory_spill_ratio | proposed_memory_spill_ratio ----------+---------------+-------------+----------------------+----------------+--------------+-----------------------+---------------------+------------------------------+--------------------+----------------------------- - 6437 | default_group | 20 | 20 | 30 | 30 | 30 | 50 | 50 | 20 | 20 - 6438 | admin_group | 40 | 40 | 10 | 10 | 10 | 50 | 50 | 20 | 20 -(2 rows) diff --git a/src/test/isolation2/expected/resgroup/resgroup_query_mem.out b/src/test/isolation2/expected/resgroup/resgroup_query_mem.out deleted file mode 100644 index d05f2bb968c..00000000000 --- a/src/test/isolation2/expected/resgroup/resgroup_query_mem.out +++ /dev/null @@ -1,50 +0,0 @@ --- This test is to verify that query_mem is set correctly in QEs. --- Previously, resgroup does not consider that different number of --- segments among coordinator and segments. Now we let QEs to re-calculate --- query_mem in each segment locally. This test case use the following --- steps to verify the new method's correctness: --- 1. fetch available memory in coordinator and a single segment, --- compute the ratio --- 2. use fault inject and plpython invokes pygresql with notice, --- get a distributed plan's sort's operator memory in a QE --- 3. Get sort's operator memory in a pure QD's plan (catalog order by) --- 4. compute the ratio of two operator memorys --- 5. these two ratios should be the same. - -create extension if not exists gp_inject_fault; -CREATE -create or replace language plpython3u; -CREATE - -create table t_qmem(a int); -CREATE -select gp_inject_fault('rg_qmem_qd_qe', 'skip', dbid) from gp_segment_configuration where role = 'p' and content = 0; - gp_inject_fault ------------------ - Success: -(1 row) - -create function rg_qmem_test() returns boolean as $$ from pg import DB from copy import deepcopy import re -# 1: get resgroup available mem in QD and QE and compute ratio sql = ("select memory_available m from " "gp_toolkit.gp_resgroup_status_per_segment " "where segment_id = %d and rsgname = 'admin_group'") qd_mem = int(plpy.execute(sql % -1)[0]["m"]) qe_mem = int(plpy.execute(sql % 0)[0]["m"]) ratio1 = int(round(float(qd_mem) / qe_mem)) -# 2. use notice to get qe operator mem dbname = plpy.execute("select current_database() db")[0]["db"] db = DB(dbname=dbname) qe_opmem_info = [] db.set_notice_receiver(lambda n: qe_opmem_info.append(deepcopy(n.message))) sql = "select * from t_qmem order by 1" db.query(sql) qe_opmem = int(re.findall(r"op_mem=(\d+)", qe_opmem_info[0])[0]) db.set_notice_receiver(None) -# 3. get qd operator mem sql = "explain analyze select * from pg_class order by relpages limit 10" db.query("set gp_resgroup_print_operator_memory_limits = on;") r = db.query(sql).getresult() for (line, ) in r: if "-> Sort" not in line: continue qd_opmem = int(re.findall(r"operatorMem: (\d+)", line)[0]) break -db.close() -ratio2 = int(round(float(qd_opmem) / qe_opmem)) -return ratio1 == ratio2 -$$ language plpython3u; -CREATE - -select rg_qmem_test(); - rg_qmem_test --------------- - t -(1 row) -select gp_inject_fault('rg_qmem_qd_qe', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0; - gp_inject_fault ------------------ - Success: -(1 row) -drop function rg_qmem_test(); -DROP -drop table t_qmem; -DROP diff --git a/src/test/isolation2/expected/resgroup/restore_default_resgroup.out b/src/test/isolation2/expected/resgroup/restore_default_resgroup.out deleted file mode 100644 index eec558a3c2e..00000000000 --- a/src/test/isolation2/expected/resgroup/restore_default_resgroup.out +++ /dev/null @@ -1,70 +0,0 @@ --- enable resource group and restart cluster. --- start_ignore -! gpconfig -c gp_resource_group_cpu_limit -v 0.9; -20170830:00:35:08:440358 gpconfig:sdw6:gpadmin-[INFO]:-completed successfully with parameters '-c gp_resource_group_cpu_limit -v 0.9' - -! gpconfig -c gp_resource_group_memory_limit -v 0.9; -20170830:00:35:09:440440 gpconfig:sdw6:gpadmin-[INFO]:-completed successfully with parameters '-c gp_resource_group_memory_limit -v 0.9' - -! gpconfig -c gp_resource_manager -v group; -20170830:00:35:10:440522 gpconfig:sdw6:gpadmin-[INFO]:-completed successfully with parameters '-c gp_resource_manager -v group' - -! gpconfig -c max_connections -v 100 -m 40; -20170830:00:35:11:440726 gpconfig:sdw6:gpadmin-[INFO]:-completed successfully with parameters '-c max_connections -v 100 -m 40' - -! gpstop -rai; -20170830:00:35:12:440808 gpstop:sdw6:gpadmin-[INFO]:-Starting gpstop with args: -rai -20170830:00:35:12:440808 gpstop:sdw6:gpadmin-[INFO]:-Gathering information and validating the environment... -20170830:00:35:12:440808 gpstop:sdw6:gpadmin-[INFO]:-Obtaining Cloudberry Master catalog information -20170830:00:35:12:440808 gpstop:sdw6:gpadmin-[INFO]:-Obtaining Segment details from master... -20170830:00:35:12:440808 gpstop:sdw6:gpadmin-[INFO]:-Cloudberry Version: 'postgres (Cloudberry Database) 5.0.0-beta.9+dev.45.g52ba809 build dev' -20170830:00:35:12:440808 gpstop:sdw6:gpadmin-[INFO]:-There are 0 connections to the database -20170830:00:35:12:440808 gpstop:sdw6:gpadmin-[INFO]:-Commencing Master instance shutdown with mode='immediate' -20170830:00:35:12:440808 gpstop:sdw6:gpadmin-[INFO]:-Master host=sdw6 -20170830:00:35:12:440808 gpstop:sdw6:gpadmin-[INFO]:-Commencing Master instance shutdown with mode=immediate -20170830:00:35:12:440808 gpstop:sdw6:gpadmin-[INFO]:-Master segment instance directory=/data1/tpz/gpdb/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 -20170830:00:35:13:440808 gpstop:sdw6:gpadmin-[INFO]:-Attempting forceful termination of any leftover master process -20170830:00:35:13:440808 gpstop:sdw6:gpadmin-[INFO]:-Terminating processes for segment /data1/tpz/gpdb/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 -20170830:00:35:13:440808 gpstop:sdw6:gpadmin-[INFO]:-No standby master host configured -20170830:00:35:13:440808 gpstop:sdw6:gpadmin-[INFO]:-Commencing parallel primary segment instance shutdown, please wait... -20170830:00:35:13:440808 gpstop:sdw6:gpadmin-[INFO]:-0.00% of jobs completed -20170830:00:35:23:440808 gpstop:sdw6:gpadmin-[INFO]:-100.00% of jobs completed -20170830:00:35:23:440808 gpstop:sdw6:gpadmin-[INFO]:-Commencing parallel mirror segment instance shutdown, please wait... -20170830:00:35:23:440808 gpstop:sdw6:gpadmin-[INFO]:-0.00% of jobs completed -20170830:00:35:33:440808 gpstop:sdw6:gpadmin-[INFO]:-100.00% of jobs completed -20170830:00:35:33:440808 gpstop:sdw6:gpadmin-[INFO]:----------------------------------------------------- -20170830:00:35:33:440808 gpstop:sdw6:gpadmin-[INFO]:- Segments stopped successfully = 6 -20170830:00:35:33:440808 gpstop:sdw6:gpadmin-[INFO]:- Segments with errors during stop = 0 -20170830:00:35:33:440808 gpstop:sdw6:gpadmin-[INFO]:----------------------------------------------------- -20170830:00:35:33:440808 gpstop:sdw6:gpadmin-[INFO]:-Successfully shutdown 6 of 6 segment instances -20170830:00:35:33:440808 gpstop:sdw6:gpadmin-[INFO]:-Database successfully shutdown with no errors reported -20170830:00:35:33:440808 gpstop:sdw6:gpadmin-[INFO]:-Cleaning up leftover shared memory -20170830:00:35:33:440808 gpstop:sdw6:gpadmin-[INFO]:-Restarting System... - --- end_ignore - -show gp_resource_manager; - gp_resource_manager ---------------------- - group -(1 row) -show gp_resource_group_cpu_limit; - gp_resource_group_cpu_limit ------------------------------ - 0.9 -(1 row) -show gp_resource_group_memory_limit; - gp_resource_group_memory_limit --------------------------------- - 0.9 -(1 row) -show max_connections; - max_connections ------------------ - 40 -(1 row) - --- by default admin_group has concurrency set to -1 which leads to --- very small memory quota for each resgroup slot, correct it. -ALTER RESOURCE GROUP admin_group SET concurrency 40; -ALTER diff --git a/src/test/isolation2/isolation2_resgroup_schedule b/src/test/isolation2/isolation2_resgroup_schedule index b621d201bc3..f2d315d1361 100644 --- a/src/test/isolation2/isolation2_resgroup_schedule +++ b/src/test/isolation2/isolation2_resgroup_schedule @@ -15,7 +15,7 @@ test: resgroup/resgroup_seg_down_2pc # functions test: resgroup/resgroup_concurrency test: resgroup/resgroup_bypass -#test: resgroup/resgroup_alter_concurrency +test: resgroup/resgroup_alter_concurrency test: resgroup/resgroup_cpu_rate_limit test: resgroup/resgroup_cpuset test: resgroup/resgroup_cpuset_empty_default @@ -26,10 +26,6 @@ test: resgroup/resgroup_move_query test: resgroup/resgroup_recreate test: resgroup/resgroup_functions -# parallel tests -#test: resgroup/restore_default_resgroup -#test: resgroup/resgroup_parallel_queries - # dump info test: resgroup/resgroup_dumpinfo diff --git a/src/test/isolation2/output/resgroup/resgroup_alter_memory.source b/src/test/isolation2/output/resgroup/resgroup_alter_memory.source deleted file mode 100644 index d82f6e1acb6..00000000000 --- a/src/test/isolation2/output/resgroup/resgroup_alter_memory.source +++ /dev/null @@ -1,942 +0,0 @@ --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP ROLE IF EXISTS role2_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -ERROR: resource group "rg1_memory_test" does not exist -DROP RESOURCE GROUP rg2_memory_test; -ERROR: resource group "rg2_memory_test" does not exist --- end_ignore - -CREATE OR REPLACE FUNCTION resGroupPalloc(float) RETURNS int AS '@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc' LANGUAGE C READS SQL DATA; -CREATE - -CREATE OR REPLACE FUNCTION hold_memory_by_percent(int, float) RETURNS int AS $$ SELECT * FROM resGroupPalloc($2) $$ LANGUAGE sql; -CREATE - -CREATE OR REPLACE FUNCTION hold_memory_by_percent_on_qe(int, float) RETURNS int AS $$ SELECT resGroupPalloc($2) FROM gp_dist_random('gp_id') $$ LANGUAGE sql; -CREATE - --- After a 'q' command the client connection is disconnected but the --- QD may still be alive, if we then query pg_stat_activity quick enough --- we might still see this session with query ''. --- A filter is put to filter out this kind of quitted sessions. -CREATE OR REPLACE VIEW rg_activity_status AS SELECT rsgname, wait_event_type, state, query FROM pg_stat_activity WHERE rsgname in ('rg1_memory_test', 'rg2_memory_test') AND query <> '' ORDER BY sess_id; -CREATE - -CREATE OR REPLACE VIEW rg_mem_status AS SELECT groupname, memory_limit, memory_shared_quota FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg1_memory_test' OR groupname='rg2_memory_test' ORDER BY groupid; -CREATE - -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=5); -CREATE -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE - --- --- 1.1) alter memory shared quota with low memory usage --- - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 2; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 60; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 50; -ALTER - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 60 | 50 -(1 row) - -1: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent(1,0.1); - hold_memory_by_percent ------------------------- - 0 -(1 row) --- proc 1 gets a quota of 60%*50%/2=15% --- it has consumed 60%*10%=6% --- the group has 60%*50%-15%=15% free quota and 60%*50%=30% free shared quota - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 20; -ALTER - --- now the group has 60%*80%-15%=33% free quota and 60%*20%=12% free shared quota, --- so memory_shared_quota shall be the new value. -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 60 | 20 -(1 row) - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 70; -ALTER - --- now the group has 60%*30%-15%=3% free quota and 60%*70%=42% free shared quota, --- so memory_shared_quota shall be the new value. -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 60 | 70 -(1 row) - --- --- 1.2) alter memory shared quota with high memory usage --- - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 80; -ALTER - --- now the group has 60%*20%-15%=-3% free quota and 60%*80%=48% free shared quota, --- so memory_shared_quota shall be the old value. -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 60 | 80 -(1 row) - -1q: ... - --- --- 1.3) alter memory shared quota up and down --- - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 2; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 40; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 60; -ALTER - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 40 | 60 -(1 row) - -1: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN --- proc1 has a quota of 40%*40%/2=8% --- rg1 still have 8% free quota - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 4; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 70; -ALTER --- rg1 should free some quota, 40%*40%/2*1-40%*30%/4*3=8%-9%=-1% --- rg1 now have 40%*20%=8% free quota --- each slot in rg1 requires 40%*30%/4=3% - -2: SET ROLE TO role1_memory_test; -SET -2: BEGIN; -BEGIN -3: SET ROLE TO role1_memory_test; -SET -3: BEGIN; -BEGIN --- proc2&proc3 each requires a quota of 40%*30%/4=3% --- rg1 now has 8%-3%*2=2% free quota - -4: SET ROLE TO role1_memory_test; -SET -4&: BEGIN; --- proc4 shall be pending - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 40 | 70 -(1 row) -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ------------------+-----------------+---------------------+-------- - rg1_memory_test | Client | idle in transaction | BEGIN; - rg1_memory_test | Client | idle in transaction | BEGIN; - rg1_memory_test | Client | idle in transaction | BEGIN; - rg1_memory_test | ResourceGroup | active | BEGIN; -(4 rows) - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 40; -ALTER --- rg1 now have 40%*60%-8%-3%*2=10% free quota again --- and now proc4 requires a quota of 40%*60%/4=6%, --- so it shall be waken up - -4<: <... completed> -BEGIN -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 40 | 40 -(1 row) -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ------------------+-----------------+---------------------+-------- - rg1_memory_test | Client | idle in transaction | BEGIN; - rg1_memory_test | Client | idle in transaction | BEGIN; - rg1_memory_test | Client | idle in transaction | BEGIN; - rg1_memory_test | Client | idle in transaction | BEGIN; -(4 rows) - -1q: ... -2q: ... -3q: ... -4q: ... - --- --- 2.1) alter memory limit with low memory usage (and low memory shared usage) --- - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 2; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 50; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 60; -ALTER - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 50 | 60 -(1 row) - -1: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent(1,0.1); - hold_memory_by_percent ------------------------- - 0 -(1 row) --- proc 1 gets a quota of 50%*40%/2=10% --- it has consumed 50%*10%=5% --- the group has 50%*40%-10%=10% free quota and 50%*60%=30% free shared quota - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 60; -ALTER - --- now the group has 60%*40%-10%=14% free quota and 60%*60%=36% free shared quota, --- so memory_limit can be the new value, however at the moment we don't update --- value when increasing memory_limit, so it's still the old value. -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 60 | 60 -(1 row) - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 40; -ALTER - --- now the group has 40%*40%-10%=6% free quota and 40%*60%=24% free shared quota, --- so memory_limit shall be the new value. -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 40 | 60 -(1 row) - --- --- 2.2) alter memory limit with high memory usage and low memory shared usage --- - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 10; -ALTER - --- now the group has 10%*40%-10%=-6% free quota and 10%*60%=6% free shared quota, --- so memory_limit shall be the old value. -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 10 | 60 -(1 row) - --- --- 2.3) alter memory limit with high memory usage and high memory shared usage --- - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 2; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 40; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 60; -ALTER - --- now the group has 40%*40%-10%=6% free quota and 40%*60%=24% free shared quota, -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 40 | 60 -(1 row) - -1: SELECT hold_memory_by_percent(1,0.5); - hold_memory_by_percent ------------------------- - 0 -(1 row) --- proc 1 has consumed another 50%*50%=25%, in total 30% --- now it has consumed all its 10% quota, as well as 20% shared quota --- now the group has 40%*40%-10%=6% free quota and 40%*60%-20%=4% free shared quota, - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 20; -ALTER - --- now the group has 40%*80%-10%=22% free quota and 40%*20%-20%=-12% free shared quota, --- so memory_shared_quota shall be the old value. -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 40 | 20 -(1 row) - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 30; -ALTER - --- now the group has 30%*80%-10%=14% free quota and 30%*20%-20%=-14% free shared quota, --- so memory_limit shall be the old value. -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 30 | 20 -(1 row) - -1q: ... - --- --- 3.1) decrease one group and increase another, no load --- - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 3; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 30; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 0; -ALTER - -CREATE RESOURCE GROUP rg2_memory_test WITH (concurrency=3, cpu_hard_quota_limit=10, memory_limit=30, memory_shared_quota=0, memory_spill_ratio=5); -CREATE -CREATE ROLE role2_memory_test RESOURCE GROUP rg2_memory_test; -CREATE - --- default_group and admin_group consumed 40% memory_limit, --- so with rg1+rg2=60% all memory_limit is already allocated, --- so increasing any of them shall fail. -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 31; -ERROR: total memory_limit exceeded the limit of 100 - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 30 | 0 - rg2_memory_test | 30 | 0 -(2 rows) - --- but increase could succeed if another rg is first decreased. -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 20; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 40; -ALTER - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 40 | 0 - rg2_memory_test | 20 | 0 -(2 rows) - --- --- 3.2) decrease one group and increase another, with load, no pending --- - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 3; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 30; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 0; -ALTER - -ALTER RESOURCE GROUP rg2_memory_test SET CONCURRENCY 2; -ALTER -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 30; -ALTER -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_SHARED_QUOTA 0; -ALTER - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 30 | 0 - rg2_memory_test | 30 | 0 -(2 rows) - -11: SET ROLE TO role1_memory_test; -SET -11: BEGIN; -BEGIN --- proc11 gets a quota of 30%/3=10% from rg1 - -12: SET ROLE TO role1_memory_test; -SET -12: BEGIN; -BEGIN --- proc12 gets a quota of 30%/3=10% from rg1 - -13: SET ROLE TO role1_memory_test; -SET -13: BEGIN; -BEGIN --- proc13 gets a quota of 30%/3=10% from rg1 - --- although all the memory quota is in use, --- it's still allowed to decrease memory_limit, --- in such a case rg2 won't get the new quota until any query in rg1 ends. -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 15; -ALTER -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 40; -ALTER --- now both rg1 and rg2 still have 30% quota - -21: SET ROLE TO role2_memory_test; -SET -21: BEGIN; -BEGIN --- proc21 gets a quota of 40%/2=20% from rg2 - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 15 | 0 - rg2_memory_test | 40 | 0 -(2 rows) -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ------------------+-----------------+---------------------+-------- - rg1_memory_test | Client | idle in transaction | BEGIN; - rg1_memory_test | Client | idle in transaction | BEGIN; - rg1_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; -(4 rows) - -11q: ... --- proc11 ends, 10%-5%=5% quota is returned to sys - -12q: ... --- proc12 ends, 10%-5%=5% quota is returned to sys - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 15 | 0 - rg2_memory_test | 40 | 0 -(2 rows) -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ------------------+-----------------+---------------------+-------- - rg1_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; -(2 rows) - --- now rg2 shall be able to get 10% free quota from sys -22: SET ROLE TO role2_memory_test; -SET -22: BEGIN; -BEGIN --- proc22 gets a quota of 40%/2=20% from rg2 - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 15 | 0 - rg2_memory_test | 40 | 0 -(2 rows) -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ------------------+-----------------+---------------------+-------- - rg1_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; -(3 rows) - -13q: ... -21q: ... -22q: ... - --- --- 3.3) decrease one group and increase another, with load, with pending, --- memory_shared_quota is 0, --- waken up by released quota memory from other group --- - -ALTER RESOURCE GROUP rg2_memory_test SET CONCURRENCY 2; -ALTER -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 30; -ALTER -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_SHARED_QUOTA 0; -ALTER - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 3; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 30; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 0; -ALTER - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 30 | 0 - rg2_memory_test | 30 | 0 -(2 rows) - -11: SET ROLE TO role1_memory_test; -SET -11: BEGIN; -BEGIN --- proc11 gets a quota of 30%/3=10% from rg1 - -12: SET ROLE TO role1_memory_test; -SET -12: BEGIN; -BEGIN --- proc12 gets a quota of 30%/3=10% from rg1 - -13: SET ROLE TO role1_memory_test; -SET -13: BEGIN; -BEGIN --- proc13 gets a quota of 30%/3=10% from rg1 - --- although all the memory quota is in use, --- it's still allowed to decrease memory_limit, --- in such a case rg2 won't get the new quota until any query in rg1 ends. -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 15; -ALTER -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 40; -ALTER --- now both rg1 and rg2 still have 30% quota - -21: SET ROLE TO role2_memory_test; -SET -21: BEGIN; -BEGIN - -22: SET ROLE TO role2_memory_test; -SET -22&: BEGIN; - --- proc21 gets a quota of 40%/2=20% from rg2 --- proc22 requires a quota of 40%/2=20% from rg2, --- but as rg2 only has 30%-20%=10% free quota now, --- it shall be pending. -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 15 | 0 - rg2_memory_test | 40 | 0 -(2 rows) -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ------------------+-----------------+---------------------+-------- - rg1_memory_test | Client | idle in transaction | BEGIN; - rg1_memory_test | Client | idle in transaction | BEGIN; - rg1_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | ResourceGroup | active | BEGIN; -(5 rows) - -11: END; -END -11q: ... --- proc11 ends, 10%-5%=5% quota is returned to sys - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 15 | 0 - rg2_memory_test | 40 | 0 -(2 rows) -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ------------------+-----------------+---------------------+-------- - rg1_memory_test | Client | idle in transaction | BEGIN; - rg1_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | ResourceGroup | active | BEGIN; -(4 rows) - -12: END; -END -12q: ... --- proc12 ends, 10%-5%=5% quota is returned to sys - --- now rg2 can get 10% free quota from sys --- so proc22 can get enough quota and get executed -22<: <... completed> -BEGIN -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 15 | 0 - rg2_memory_test | 40 | 0 -(2 rows) -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ------------------+-----------------+---------------------+-------- - rg1_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; -(3 rows) - -13q: ... -21q: ... -22q: ... - --- --- 3.4) decrease one group and increase another, with load, with pending, --- memory_shared_quota > 0 and can be freed, --- waken up by released shared quota memory from other group --- - -ALTER RESOURCE GROUP rg2_memory_test SET CONCURRENCY 2; -ALTER -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 30; -ALTER -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_SHARED_QUOTA 0; -ALTER - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 1; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 30; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 60; -ALTER - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 30 | 60 - rg2_memory_test | 30 | 0 -(2 rows) - -11: SET ROLE TO role1_memory_test; -SET -11: BEGIN; -BEGIN --- proc11 gets a quota of 30%*40%=12% from rg1 --- rg1 also has a shared quota of 30%*60%=18% - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 20; -ALTER --- now each slot in rg1 requires a quota of 20%*40%=8% --- rg1 has 0% free quota and 20%*60%=12% free shared quota --- rg1 should release some shared quota, 30%*60%-20%*60%=6% - -ALTER RESOURCE GROUP rg2_memory_test SET CONCURRENCY 4; -ALTER -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 40; -ALTER --- now rg2 has a quota of 30%+6%=36% --- now each slot in rg2 requires a quota of 40%/4=10% - -21: SET ROLE TO role2_memory_test; -SET -21: BEGIN; -BEGIN -22: SET ROLE TO role2_memory_test; -SET -22: BEGIN; -BEGIN -23: SET ROLE TO role2_memory_test; -SET -23: BEGIN; -BEGIN --- proc21~proc23 each gets a quota of 40%/4=10% --- rg2 still has 36%-10%*3=6% free quota - -24: SET ROLE TO role2_memory_test; -SET -24&: BEGIN; --- proc24 shall be pending. - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 20 | 60 - rg2_memory_test | 40 | 0 -(2 rows) -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ------------------+-----------------+---------------------+-------- - rg1_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | ResourceGroup | active | BEGIN; -(5 rows) - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 30; -ALTER --- now rg1 should release some shared quota, 20%*60%-20%*30%=6% --- now rg2 can get at most 6% new quota, but as it already has 36%, --- so rg2 actually gets 4% new quota. --- now rg2 has 40% quota, the free quota is 40%-30%=10%, --- just enough for proc24 to wake up. - -24<: <... completed> -BEGIN -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 20 | 30 - rg2_memory_test | 40 | 0 -(2 rows) -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ------------------+-----------------+---------------------+-------- - rg1_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; -(5 rows) - -11q: ... -21q: ... -22q: ... -23q: ... -24q: ... - --- --- 3.5) decrease one group and increase another, with load, with pending --- memory_shared_quota > 0 and can not be freed, --- waken up by released quota memory from other group --- - -ALTER RESOURCE GROUP rg2_memory_test SET CONCURRENCY 2; -ALTER -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 30; -ALTER -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_SHARED_QUOTA 0; -ALTER - -ALTER RESOURCE GROUP rg1_memory_test SET CONCURRENCY 10; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 30; -ALTER -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 90; -ALTER - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 30 | 90 - rg2_memory_test | 30 | 0 -(2 rows) - -11: SET ROLE TO role1_memory_test; -SET -11: BEGIN; -BEGIN -11: SELECT hold_memory_by_percent(1,0.90); - hold_memory_by_percent ------------------------- - 0 -(1 row) --- proc11 gets a quota of 30%*10%/10=0.3% from rg1 --- rg1 has a free quota of 30%*10%-0.3%=2.7% --- rg1 has a shared quota of 30%*90%=27%, --- free shared quota is 27%-(30%*90%-0.3%)=0.3% - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 20; -ALTER --- now each slot in rg1 requires a quota of 20%*10%/10=0.2% --- rg1 releases some quota, 0.1%*9=0.9%, --- so new quota is 2.1%, new free quota is 2.1%-0.3%=1.8% --- rg1 releases some shared quota, 27%-max(20%*90%,26.7%)=0.3%, --- so new shared quota is 26.7%, new free shared quota is 0% - -ALTER RESOURCE GROUP rg2_memory_test SET CONCURRENCY 4; -ALTER -ALTER RESOURCE GROUP rg2_memory_test SET MEMORY_LIMIT 40; -ALTER --- now rg2 has a quota of 30%+1.2%=31.2% --- now each slot in rg2 requires a quota of 40%/4=10% - -21: SET ROLE TO role2_memory_test; -SET -21: BEGIN; -BEGIN -22: SET ROLE TO role2_memory_test; -SET -22: BEGIN; -BEGIN -23: SET ROLE TO role2_memory_test; -SET -23: BEGIN; -BEGIN --- proc21~proc23 each gets a quota of 40%/4=10% --- rg2 still has 31.2%-10%*3=1.2% free quota - -24: SET ROLE TO role2_memory_test; -SET -24&: BEGIN; --- proc24 shall be pending. - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 20 | 90 - rg2_memory_test | 40 | 0 -(2 rows) -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ------------------+-----------------+---------------------+---------------------------------------- - rg1_memory_test | Client | idle in transaction | SELECT hold_memory_by_percent(1,0.90); - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | ResourceGroup | active | BEGIN; -(5 rows) - -ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_SHARED_QUOTA 30; -ALTER --- rg1 can't free any shared quota as all of them are in use by proc11 - -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 20 | 30 - rg2_memory_test | 40 | 0 -(2 rows) -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ------------------+-----------------+---------------------+---------------------------------------- - rg1_memory_test | Client | idle in transaction | SELECT hold_memory_by_percent(1,0.90); - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | ResourceGroup | active | BEGIN; -(5 rows) - -11q: ... --- rg1 releases 0.3%-0.2%=0.1% quota and 26.7%-18%=8.7% --- so rg2 gets 8.8% new quota --- now rg2 has 40% quota, free quota is 10% --- so proc24 shall be waken up - -24<: <... completed> -BEGIN -SELECT * FROM rg_mem_status; - groupname | memory_limit | memory_shared_quota ------------------+--------------+--------------------- - rg1_memory_test | 20 | 30 - rg2_memory_test | 40 | 0 -(2 rows) -SELECT * FROM rg_activity_status; - rsgname | wait_event_type | state | query ------------------+-----------------+---------------------+-------- - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; - rg2_memory_test | Client | idle in transaction | BEGIN; -(4 rows) - -21q: ... -22q: ... -23q: ... -24q: ... - --- cleanup -DROP VIEW rg_mem_status; -DROP -DROP ROLE role1_memory_test; -DROP -DROP ROLE role2_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP -DROP RESOURCE GROUP rg2_memory_test; -DROP - --- --- Test PrepareTransaction report an error --- -CREATE RESOURCE GROUP rg_test_group WITH (cpu_hard_quota_limit=5, memory_limit=5); -CREATE -CREATE ROLE rg_test_role RESOURCE GROUP rg_test_group; -CREATE - -SET debug_dtm_action = "fail_begin_command"; -SET -SET debug_dtm_action_target = "protocol"; -SET -SET debug_dtm_action_protocol = "prepare"; -SET -SET debug_dtm_action_segment = 0; -SET - --- ALTER should fail and the memory_limit in both catalog and share memory are --- still 5% -ALTER RESOURCE GROUP rg_test_group set memory_limit 1; -ERROR: Raise ERROR for debug_dtm_action = 2, debug_dtm_action_protocol = Distributed Prepare - -RESET debug_dtm_action; -RESET -RESET debug_dtm_action_target; -RESET -RESET debug_dtm_action_protocol; -RESET -RESET debug_dtm_action_segment; -RESET - --- should still be 5% on both QD and QE -select memory_limit from gp_toolkit.gp_resgroup_config where groupname = 'rg_test_group'; - memory_limit --------------- - 5 -(1 row) - --- --- Test error happen on commit_prepare, DDL success after retry --- -SET debug_dtm_action = "fail_begin_command"; -SET -SET debug_dtm_action_target = "protocol"; -SET -SET debug_dtm_action_protocol = "commit_prepared"; -SET -SET debug_dtm_action_segment = 0; -SET - --- ALTER should success -ALTER RESOURCE GROUP rg_test_group set memory_limit 4; -ALTER - -RESET debug_dtm_action; -RESET -RESET debug_dtm_action_target; -RESET -RESET debug_dtm_action_protocol; -RESET -RESET debug_dtm_action_segment; -RESET - --- should still be 4% on both QD and QE -select memory_limit from gp_toolkit.gp_resgroup_config where groupname = 'rg_test_group'; - memory_limit --------------- - 4 -(1 row) - -DROP ROLE rg_test_role; -DROP -DROP RESOURCE GROUP rg_test_group; -DROP diff --git a/src/test/isolation2/output/resgroup/resgroup_memory_limit.source b/src/test/isolation2/output/resgroup/resgroup_memory_limit.source deleted file mode 100644 index 638c322eea0..00000000000 --- a/src/test/isolation2/output/resgroup/resgroup_memory_limit.source +++ /dev/null @@ -1,923 +0,0 @@ --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -ERROR: resource group "rg1_memory_test" does not exist -DROP RESOURCE GROUP rg2_memory_test; -ERROR: resource group "rg2_memory_test" does not exist --- end_ignore - -CREATE OR REPLACE FUNCTION resGroupPalloc(float) RETURNS int AS '@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc' LANGUAGE C READS SQL DATA; -CREATE - -CREATE OR REPLACE FUNCTION hold_memory_by_percent(float) RETURNS int AS $$ SELECT * FROM resGroupPalloc($1) $$ LANGUAGE sql; -CREATE - -CREATE OR REPLACE VIEW rg_mem_status AS SELECT groupname, memory_limit, memory_shared_quota FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg1_memory_test' OR groupname='rg2_memory_test' ORDER BY groupid; -CREATE - -CREATE OR REPLACE VIEW memory_result AS SELECT rsgname, memory_usage from gp_toolkit.gp_resgroup_status; -CREATE - --- 1) single allocation --- Group Share Quota = 0 --- Global Share Quota > 0 --- Slot Quota > 0 --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 52% => 52% --- rg1's slot quota: 52% / 2 * 2 => 52% --- rg1's single slot quota: 52% / 2 => 26% --- rg1's shared quota: %52 - %52 => %0 --- system free chunks: 100% - 10% - 30% - 52% => 8% --- memory available to one slot in rg1: 52%/2 + 0% + 8% => 34% -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=52, memory_shared_quota=0); -CREATE -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE - --- 1a) on QD -1: SET ROLE TO role1_memory_test; -SET -1: SELECT hold_memory_by_percent(0.14 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.14 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.14 / 0.52); -ERROR: Out of memory -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - -1: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent(0.14 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.14 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.14 / 0.52); -ERROR: Out of memory -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - -1: SET ROLE TO role1_memory_test; -SET -1: SELECT hold_memory_by_percent(0.42 / 0.52); -ERROR: Out of memory -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - --- 1b) on QEs -1: SET ROLE TO role1_memory_test; -SET -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.14 / 0.52)=0; - count -------- - 0 -(1 row) -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.14 / 0.52)=0; - count -------- - 0 -(1 row) -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.14 / 0.52)=0; -ERROR: Out of memory (seg0 slice1 10.152.10.56:25432 pid=18610) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - -1: SET ROLE TO role1_memory_test; -SET -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.42 / 0.52)=0; -ERROR: Out of memory (seg0 slice1 10.152.10.56:25432 pid=18619) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - -DROP ROLE role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP - --- 2) single allocation --- Group Share Quota > 0 --- Global Share Quota > 0 --- Slot Quota > 0 --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 52 / 100 => 52% --- rg1's slot quota: 52% * 60 /100 => 31% --- rg1's single slot quota: 31% / 2 => 15.5% --- rg1's shared quota: 52% - 31% => 21% --- system free chunks: 100% - 10% - 30% - 52% => 8% --- memory available to one slot in rg1: 15.5% + 21% + 8% => 44.5% - -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=52, memory_shared_quota=40); -CREATE -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE - --- 2a) on QD -1: SET ROLE TO role1_memory_test; -SET -1: SELECT hold_memory_by_percent(0.12 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.12 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.12 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.12 / 0.52); -ERROR: Out of memory -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - -1: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent(0.12 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.12 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.12 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.12 / 0.52); -ERROR: Out of memory -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - -1: SET ROLE TO role1_memory_test; -SET -1: SELECT hold_memory_by_percent(0.48 / 0.52); -ERROR: Out of memory -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - --- 2b) on QEs -1: SET ROLE TO role1_memory_test; -SET -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.12 / 0.52)=0; - count -------- - 0 -(1 row) -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.12 / 0.52)=0; - count -------- - 0 -(1 row) -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.12 / 0.52)=0; - count -------- - 0 -(1 row) -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.12 / 0.52)=0; -ERROR: Out of memory (seg0 slice1 10.152.10.56:25432 pid=19259) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - -1: SET ROLE TO role1_memory_test; -SET -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.48 / 0.52)=0; -ERROR: Out of memory (seg0 slice1 10.152.10.56:25432 pid=19269) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - -DROP ROLE role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP - --- 3) single allocation --- Group Share Quota > 0 --- Global Share Quota > 0 --- Slot Quota = 0 --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 52 / 100 => 52% --- rg1's slot quota: 0 --- rg1's shared quota: 52% --- system free chunks: 100% - 10% - 30% - 52% => 8% --- memory available to one slot in rg1: 52% + 8% => 60% - -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=52, memory_shared_quota=100); -CREATE -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE - --- 3a) on QD -1: SET ROLE TO role1_memory_test; -SET -1: SELECT hold_memory_by_percent(0.25 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.25 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.25 / 0.52); -ERROR: Out of memory -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - -1: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent(0.25 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.25 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.25 / 0.52); -ERROR: Out of memory -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - -1: SET ROLE TO role1_memory_test; -SET -1: SELECT hold_memory_by_percent(0.75 / 0.52); -ERROR: Out of memory -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - --- 3b) on QEs -1: SET ROLE TO role1_memory_test; -SET -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.25 / 0.52)=0; - count -------- - 0 -(1 row) -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.25 / 0.52)=0; - count -------- - 0 -(1 row) -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.25 / 0.52)=0; -ERROR: Out of memory (seg0 slice1 10.152.10.56:25432 pid=19875) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - -1: SET ROLE TO role1_memory_test; -SET -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.75 / 0.52)=0; -ERROR: Out of memory (seg0 slice1 10.152.10.56:25432 pid=19884) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - -DROP ROLE role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP - --- 4) multi allocation in one group --- Group Share Quota = 0 --- Global Share Quota > 0 --- Slot Quota > 0 --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 52 / 100 => 52% --- rg1's slot quota: 52% / 2 * 2 => 52% --- rg1's single slot quota: 52% / 2 => 26% --- rg1's shared quota: 0 --- system free chunks: 100% - 10% - 30% - 52% => 8% --- memory available to one slot in rg1: 26% + 8% => 34% - -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=52, memory_shared_quota=0); -CREATE -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE - --- 4a) on QD --- not exceed the global share -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent(0.28 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -2: SELECT hold_memory_by_percent(0.28 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1q: ... -2q: ... - --- exceed the global share -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent(0.32 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -2: SELECT hold_memory_by_percent(0.32 / 0.52); -ERROR: Out of memory -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... -2q: ... - --- allocate serially -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent(0.32 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1q: ... -SELECT pg_sleep(1); - pg_sleep ----------- - -(1 row) -2: SELECT hold_memory_by_percent(0.32 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -2q: ... - --- 4b) on QEs --- not exceed the global share -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.28 / 0.52)=0; - count -------- - 0 -(1 row) -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.28 / 0.52)=0; - count -------- - 0 -(1 row) -1q: ... -2q: ... - --- exceed the global share -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.32 / 0.52)=0; - count -------- - 0 -(1 row) -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.32 / 0.52)=0; -ERROR: Out of memory (seg0 slice1 10.152.10.56:25432 pid=21102) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... -2q: ... - --- allocate serially -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.32 / 0.52)=0; - count -------- - 0 -(1 row) -1q: ... -SELECT pg_sleep(1); - pg_sleep ----------- - -(1 row) -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.32 / 0.52)=0; - count -------- - 0 -(1 row) -2q: ... - -DROP ROLE role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP - --- 5) multi allocation in one group --- Group Share Quota > 0 --- Global Share Quota > 0 --- Slot Quota > 0 --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 52 / 100 => 52% --- rg1's slot quota: 52% * 50 / 100 => 26% --- rg1's single slot quota: 26% / 2 => 13% --- rg1's shared quota: 52% - 13% * 2 => 26% --- system free chunks: 100% - 10% - 30% - 52% => 8% --- memory available to one slot in rg1: 13% + 26% + 8% => 47% - -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=52, memory_shared_quota=50); -CREATE -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE - --- 5a) on QD --- not exceed the global share -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN --- reserve all the group shared quota -1: SELECT hold_memory_by_percent(0.39 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) --- must allocate from global share -2: SELECT hold_memory_by_percent(0.2 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1q: ... -2q: ... - --- exceed the global share -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent(0.39 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -2: SELECT hold_memory_by_percent(0.39 / 0.52); -ERROR: Out of memory -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... -2q: ... - --- allocate serially -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent(0.39 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1q: ... -SELECT pg_sleep(1); - pg_sleep ----------- - -(1 row) -2: SELECT hold_memory_by_percent(0.39 / 0.52); - hold_memory_by_percent ------------------------- - 0 -(1 row) -2q: ... - --- 5b) on QEs --- not exceed the global share -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN --- reserve all the group shared quota -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.39 / 0.52)=0; - count -------- - 0 -(1 row) --- must allocate from global share -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.2 / 0.52)=0; - count -------- - 0 -(1 row) -1q: ... -2q: ... - --- exceed the global share -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.39 / 0.52)=0; - count -------- - 0 -(1 row) -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.39 / 0.52)=0; -ERROR: Out of memory (seg0 slice1 10.152.10.56:25432 pid=21783) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... -2q: ... - --- allocate serially -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.39 / 0.52)=0; - count -------- - 0 -(1 row) -1q: ... -SELECT pg_sleep(1); - pg_sleep ----------- - -(1 row) -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.39 / 0.52)=0; - count -------- - 0 -(1 row) -2q: ... - -DROP ROLE role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP - --- 6) multi allocation in different group --- Group Share Quota > 0 --- Global Share Quota > 0 --- Slot Quota > 0 --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 20 / 100 => 20% --- rg1's slot quota: 20% * 60 / 100 / 2 * 2 => 12% --- rg1's single slot quota: 12% / 2 => 6% --- rg1's shared quota: 20% - 6% * 2 => 8% --- rg2 same as rg1 --- system free chunks: 100% - 10% - 30% - 100%*20/100 - 100%*20/100 => 20% --- memory available to one slot in rg1/rg2: 6% + 8% + 20% => 34% - -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=20, memory_shared_quota=40); -CREATE -CREATE RESOURCE GROUP rg2_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=20, memory_shared_quota=40); -CREATE -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE -CREATE ROLE role2_memory_test RESOURCE GROUP rg2_memory_test; -CREATE - --- 6a) on QD --- not exceed the global share -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role2_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent(0.2 / 0.2); - hold_memory_by_percent ------------------------- - 0 -(1 row) -2: SELECT hold_memory_by_percent(0.2 / 0.2); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1q: ... -2q: ... - --- exceed the global share -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role2_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent(0.3 / 0.2); - hold_memory_by_percent ------------------------- - 0 -(1 row) -2: SELECT hold_memory_by_percent(0.3 / 0.2); -ERROR: Out of memory -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... -2q: ... - --- allocate serially -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role2_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT hold_memory_by_percent(0.3 / 0.2); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1q: ... -SELECT pg_sleep(1); - pg_sleep ----------- - -(1 row) -2: SELECT hold_memory_by_percent(0.3 / 0.2); - hold_memory_by_percent ------------------------- - 0 -(1 row) -2q: ... - --- 6b) on QEs --- not exceed the global share -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role2_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.2 / 0.2)=0; - count -------- - 0 -(1 row) -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.2 / 0.2)=0; - count -------- - 0 -(1 row) -1q: ... -2q: ... - --- exceed the global share -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role2_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3 / 0.2)=0; - count -------- - 0 -(1 row) -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3 / 0.2)=0; -ERROR: Out of memory (seg0 slice1 10.152.10.56:25432 pid=22464) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... -2q: ... - --- allocate serially -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role2_memory_test; -SET -1: BEGIN; -BEGIN -2: BEGIN; -BEGIN -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3 / 0.2)=0; - count -------- - 0 -(1 row) -1q: ... -SELECT pg_sleep(1); - pg_sleep ----------- - -(1 row) -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3 / 0.2)=0; - count -------- - 0 -(1 row) -2q: ... - -DROP ROLE role1_memory_test; -DROP -DROP ROLE role2_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP -DROP RESOURCE GROUP rg2_memory_test; -DROP - --- 7) DBA can increase global shared memory by decreasing --- any existing group_memory_limit --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 30 / 100 => 30% --- rg1's slot quota: 30% --- rg1's single slot quota: 30% / 2 => 15% --- rg1's shared quota: 0 --- rg2 same as rg1 --- system free chunks: 100% - 10% - 30% - 30% - 30% => 0 --- memory available to one slot in rg1/rg2: 15% + 0 + 0 => 15% - -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=30, memory_shared_quota=0); -CREATE -CREATE RESOURCE GROUP rg2_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=30, memory_shared_quota=0); -CREATE -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE -CREATE ROLE role2_memory_test RESOURCE GROUP rg2_memory_test; -CREATE - --- 7a) on QD --- not enough memory -1: SET ROLE TO role1_memory_test; -SET -1: SELECT hold_memory_by_percent(0.2 / 0.3); -ERROR: Out of memory -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - --- alter rg2 memory_limit so last query has enough memory -ALTER RESOURCE GROUP rg2_memory_test SET memory_limit 20; -ALTER --- system free chunks: 100% - 10% - 30% - 30% - 20% => 10% --- memory available to one slot in rg1/rg2: 15% + 0 + 10% => 25% - --- enough memory for allocating -1: SET ROLE TO role1_memory_test; -SET -1: SELECT hold_memory_by_percent(0.2 / 0.3); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1q: ... - --- 7b) on QEs --- not enough memory -ALTER RESOURCE GROUP rg2_memory_test SET memory_limit 30; -ALTER -1: SET ROLE TO role1_memory_test; -SET -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.2 / 0.3)=0; -ERROR: Out of memory (seg0 slice1 10.152.10.56:25432 pid=23131) -DETAIL: Resource group memory limit reached -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - --- alter rg2 memory_limit so last query has enough memory -ALTER RESOURCE GROUP rg2_memory_test SET memory_limit 20; -ALTER --- system free chunks: 100% - 10% - 30% - 30% - 20% => 10% --- memory available to one slot in rg1/rg2: 15% + 0 + 10% => 25% - --- enough memory for allocating -1: SET ROLE TO role1_memory_test; -SET -1: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.2 / 0.3)=0; - count -------- - 0 -(1 row) -1q: ... - -DROP ROLE role1_memory_test; -DROP -DROP ROLE role2_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP -DROP RESOURCE GROUP rg2_memory_test; -DROP diff --git a/src/test/isolation2/output/resgroup/resgroup_memory_runaway.source b/src/test/isolation2/output/resgroup/resgroup_memory_runaway.source deleted file mode 100644 index 1fef409e1bb..00000000000 --- a/src/test/isolation2/output/resgroup/resgroup_memory_runaway.source +++ /dev/null @@ -1,303 +0,0 @@ --- start_ignore -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -ERROR: resource group "rg1_memory_test" does not exist -DROP RESOURCE GROUP rg2_memory_test; -ERROR: resource group "rg2_memory_test" does not exist --- end_ignore - -CREATE OR REPLACE FUNCTION resGroupPalloc(float) RETURNS int AS '@abs_builddir@/../regress/regress@DLSUFFIX@', 'resGroupPalloc' LANGUAGE C READS SQL DATA; -CREATE - -CREATE OR REPLACE FUNCTION hold_memory_by_percent(float) RETURNS int AS $$ SELECT * FROM resGroupPalloc($1) $$ LANGUAGE sql; -CREATE - -CREATE OR REPLACE VIEW rg_mem_status AS SELECT groupname, memory_limit, memory_shared_quota FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg1_memory_test' OR groupname='rg2_memory_test' ORDER BY groupid; -CREATE - -CREATE OR REPLACE VIEW memory_result AS SELECT rsgname, memory_usage from gp_toolkit.gp_resgroup_status; -CREATE - --- start_ignore -! gpconfig -c runaway_detector_activation_percent -v 50; -20191213:05:03:47:014263 gpconfig:hubert-gp-centos:huanzhang-[INFO]:-completed successfully with parameters '-c runaway_detector_activation_percent -v 50' - -! gpstop -ari; -20191213:05:03:48:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Starting gpstop with args: -ari -20191213:05:03:48:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Gathering information and validating the environment... -20191213:05:03:48:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Obtaining Cloudberry Master catalog information -20191213:05:03:48:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Obtaining Segment details from master... -20191213:05:03:48:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Cloudberry Version: 'postgres (Cloudberry Database) 7.0.0-alpha.0+dev.5211.gf5c0dd1 build dev' -20191213:05:03:48:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Commencing Master instance shutdown with mode='immediate' -20191213:05:03:48:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Master segment instance directory=/home/huanzhang/workspace/gpdb7/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 -20191213:05:03:48:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Attempting forceful termination of any leftover master process -20191213:05:03:48:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Terminating processes for segment /home/huanzhang/workspace/gpdb7/gpAux/gpdemo/datadirs/qddir/demoDataDir-1 -20191213:05:03:48:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Stopping master standby host hubert-gp-centos mode=immediate -20191213:05:03:48:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Successfully shutdown standby process on hubert-gp-centos -20191213:05:03:48:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Targeting dbid [2, 5, 3, 6, 4, 7] for shutdown -20191213:05:03:48:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Commencing parallel primary segment instance shutdown, please wait... -20191213:05:03:48:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-0.00% of jobs completed -20191213:05:03:49:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-100.00% of jobs completed -20191213:05:03:49:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Commencing parallel mirror segment instance shutdown, please wait... -20191213:05:03:49:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-0.00% of jobs completed -20191213:05:03:50:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-100.00% of jobs completed -20191213:05:03:50:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:----------------------------------------------------- -20191213:05:03:50:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:- Segments stopped successfully = 6 -20191213:05:03:50:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:- Segments with errors during stop = 0 -20191213:05:03:50:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:----------------------------------------------------- -20191213:05:03:50:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Successfully shutdown 6 of 6 segment instances -20191213:05:03:50:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Database successfully shutdown with no errors reported -20191213:05:03:50:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Cleaning up leftover shared memory -20191213:05:03:50:014419 gpstop:hubert-gp-centos:huanzhang-[INFO]:-Restarting System... - --- end_ignore - --- after the restart we need a new connection to run the queries --- 1) single allocation --- Group Share Quota = 0 --- Global Share Quota > 0 --- Slot Quota > 0 --- ----------------------- - --- we assume system total chunks is 100% --- rg1's expected: 100% * 20% => 20% --- rg1's slot quota: 20% / 2 * 2 => 20% --- rg1's single slot quota: 20% / 2 => 10% --- rg1's shared quota: 20% - 20% => %0 --- system free chunks: 100% - 10% - 30% - 20% => 40% --- global area safe threshold: 40% / 2 = 20% -1: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=20, memory_shared_quota=0); -CREATE -1: CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE --- 1a) on QD -1: SET ROLE TO role1_memory_test; -SET -1: SELECT hold_memory_by_percent(1.0); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.3); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.3); -ERROR: Canceling query because of high VMEM usage. current group id is 806868, group memory usage 218 MB, group shared memory quota is 0 MB, slot memory quota is 68 MB, global freechunks memory is 124 MB, global safe memory threshold is 137 MB (runaway_cleaner.c:197) -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - --- 1b) on QEs -2: SELECT pg_sleep(1); - pg_sleep ----------- - -(1 row) -2: SET ROLE TO role1_memory_test; -SET -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(1.0)=0; - count -------- - 0 -(1 row) -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3)=0; - count -------- - 0 -(1 row) -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3)=0; -ERROR: Canceling query because of high VMEM usage. current group id is 806868, group memory usage 218 MB, group shared memory quota is 0 MB, slot memory quota is 68 MB, global freechunks memory is 124 MB, global safe memory threshold is 137 MB (runaway_cleaner.c:197) (seg0 slice1 10.146.0.4:7002 pid=10883) (runaway_cleaner.c:197) -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -2q: ... - -0: DROP ROLE role1_memory_test; -DROP -0: DROP RESOURCE GROUP rg1_memory_test; -DROP -0q: ... - - --- we assume system total chunks is 100% --- rg1's expected: 100% * 20% => 20% --- rg1's slot quota: 20% / 2 => 10% --- rg1's single slot quota: 10% / 2 => 5% --- rg1's shared quota: %20 - %10 => %10 --- system free chunks: 100% - 10% - 30% - 20% => 40% --- safe threshold: 40% / 2 = 20% -1: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=20, memory_shared_quota=50); -CREATE -1: CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE --- 1a) on QD -1: SET ROLE TO role1_memory_test; -SET -1: SELECT hold_memory_by_percent(1.0); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.3); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.3); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.3); -ERROR: Canceling query because of high VMEM usage. current group id is 806877, group memory usage 259 MB, group shared memory quota is 68 MB, slot memory quota is 34 MB, global freechunks memory is 117 MB, global safe memory threshold is 137 MB (runaway_cleaner.c:197) -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - --- 1b) on QEs -2: SELECT pg_sleep(1); - pg_sleep ----------- - -(1 row) -2: SET ROLE TO role1_memory_test; -SET -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(1.0)=0; - count -------- - 0 -(1 row) -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3)=0; - count -------- - 0 -(1 row) -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3)=0; - count -------- - 0 -(1 row) -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.3)=0; -ERROR: Canceling query because of high VMEM usage. current group id is 806877, group memory usage 259 MB, group shared memory quota is 68 MB, slot memory quota is 34 MB, global freechunks memory is 117 MB, global safe memory threshold is 137 MB (runaway_cleaner.c:197) (seg0 slice1 10.146.0.4:7002 pid=10918) (runaway_cleaner.c:197) -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -2q: ... - -0: DROP ROLE role1_memory_test; -DROP -0: DROP RESOURCE GROUP rg1_memory_test; -DROP -0q: ... - - - --- we assume system total chunks is 100% --- rg1's expected: 100% * 20% => 20% --- rg1's slot quota: 20% / 2 => 10% --- rg1's single slot quota: 10% / 2 => 5% --- rg1's shared quota: %20 - %10 => %10 --- rg2's expected: 100% * 20% => 20% --- system free chunks: 100% - 10% - 30% - 20% - 20%=> 20% --- safe threshold: 20% / 2 = 10% -1: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=20, memory_shared_quota=50); -CREATE -1: CREATE RESOURCE GROUP rg2_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=20, memory_shared_quota=0); -CREATE -1: CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE --- 1a) on QD -1: SET ROLE TO role1_memory_test; -SET -1: SELECT hold_memory_by_percent(1.0); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.15); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.15); -ERROR: Canceling query because of high VMEM usage. current group id is 806886, group memory usage 178 MB, group shared memory quota is 68 MB, slot memory quota is 34 MB, global freechunks memory is 62 MB, global safe memory threshold is 69 MB (runaway_cleaner.c:197) -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -1q: ... - --- 1b) on QEs -2: SELECT pg_sleep(1); - pg_sleep ----------- - -(1 row) -2: SET ROLE TO role1_memory_test; -SET -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(1.0)=0; - count -------- - 0 -(1 row) -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.15)=0; - count -------- - 0 -(1 row) -2: SELECT count(null) FROM gp_dist_random('gp_id') t1 WHERE hold_memory_by_percent(0.15)=0; -ERROR: Canceling query because of high VMEM usage. current group id is 806886, group memory usage 178 MB, group shared memory quota is 68 MB, slot memory quota is 34 MB, global freechunks memory is 62 MB, global safe memory threshold is 69 MB (runaway_cleaner.c:197) (seg0 slice1 10.146.0.4:7002 pid=10952) (runaway_cleaner.c:197) -CONTEXT: SQL function "hold_memory_by_percent" statement 1 -2q: ... - -0: DROP ROLE role1_memory_test; -DROP -0: DROP RESOURCE GROUP rg1_memory_test; -DROP -0: DROP RESOURCE GROUP rg2_memory_test; -DROP -0q: ... - --- test for the rounding issue of runaway_detector_activation_percent --- when calculating safeChunksThreshold, we used to multiply --- runaway_detector_activation_percent and then divide 100. This will --- cause the small chunks to be rounded to zero. --- set runaway_detector_activation_percent to 99 to enlarge the rounding --- issue - --- start_ignore -! gpconfig -c runaway_detector_activation_percent -v 99; -! gpstop -ari; --- end_ignore - -1: CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=60, memory_shared_quota=50); -CREATE -1: CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE --- trigger small chunks rounding issue by reducing memory limit in small step --- while increasing memory limit in big step. -1: ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 57; -ALTER -1: ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 54; -ALTER -1: ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 51; -ALTER -1: ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 48; -ALTER -1: ALTER RESOURCE GROUP rg1_memory_test SET MEMORY_LIMIT 60; -ALTER --- 1a) on QD -1: SET ROLE TO role1_memory_test; -SET -1: SELECT hold_memory_by_percent(0.1); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1: SELECT hold_memory_by_percent(0.1); - hold_memory_by_percent ------------------------- - 0 -(1 row) -1q: ... - -0: DROP ROLE role1_memory_test; -DROP -0: DROP RESOURCE GROUP rg1_memory_test; -DROP -0q: ... - diff --git a/src/test/isolation2/output/resgroup/resgroup_memory_statistic.source b/src/test/isolation2/output/resgroup/resgroup_memory_statistic.source deleted file mode 100644 index 51ead15a68b..00000000000 --- a/src/test/isolation2/output/resgroup/resgroup_memory_statistic.source +++ /dev/null @@ -1,302 +0,0 @@ -DROP ROLE IF EXISTS role1_memory_test; -DROP -DROP ROLE IF EXISTS role2_memory_test; -DROP --- start_ignore -DROP RESOURCE GROUP rg1_memory_test; -ERROR: resource group "rg1_memory_test" does not exist -DROP RESOURCE GROUP rg2_memory_test; -ERROR: resource group "rg2_memory_test" does not exist --- end_ignore - -CREATE OR REPLACE FUNCTION repeatPalloc(int, int) RETURNS int AS '@abs_builddir@/../regress/regress@DLSUFFIX@', 'repeatPalloc' LANGUAGE C READS SQL DATA; -CREATE - -CREATE OR REPLACE FUNCTION hold_memory(int, int) RETURNS int AS $$ SELECT * FROM repeatPalloc(1, $2) $$ LANGUAGE sql; -CREATE - -CREATE OR REPLACE VIEW eat_memory_on_qd AS SELECT hold_memory(0,20); -CREATE - -CREATE OR REPLACE VIEW eat_memory_on_one_slice2 AS SELECT count(null) > 0 FROM gp_dist_random('gp_id') t1 WHERE hold_memory(t1.dbid,20)=0 ; -CREATE - -CREATE OR REPLACE VIEW eat_memory_on_slices2 AS SELECT count(null) > 0 FROM gp_dist_random('gp_id') t1, gp_dist_random('gp_id') t2 WHERE hold_memory(t1.dbid,20)=0 AND hold_memory(t2.dbid,20)=0 ; -CREATE - -CREATE OR REPLACE FUNCTION round_test(float, integer) RETURNS float AS $$ SELECT round($1 / $2) * $2 $$ LANGUAGE sql; -CREATE - -CREATE OR REPLACE VIEW memory_result AS SELECT rsgname, ismaster, round_test(avg(memory_usage), 10) AS avg_mem FROM( SELECT rsgname, CASE (j->'key')::text WHEN '"-1"'::text THEN 1 ELSE 0 END AS ismaster, ((j->'value')->>'used')::int AS memory_usage FROM( SELECT rsgname, row_to_json(json_each(memory_usage::json)) AS j FROM gp_toolkit.gp_resgroup_status WHERE rsgname='rg1_memory_test' OR rsgname='rg2_memory_test' )a )b GROUP BY (rsgname, ismaster) ORDER BY rsgname, ismaster; -CREATE - -CREATE RESOURCE GROUP rg1_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=30); -CREATE -CREATE ROLE role1_memory_test RESOURCE GROUP rg1_memory_test; -CREATE -CREATE RESOURCE GROUP rg2_memory_test WITH (concurrency=2, cpu_hard_quota_limit=10, memory_limit=30); -CREATE -CREATE ROLE role2_memory_test RESOURCE GROUP rg2_memory_test; -CREATE - -GRANT ALL ON eat_memory_on_qd TO role1_memory_test; -GRANT -GRANT ALL ON eat_memory_on_one_slice2 TO role1_memory_test; -GRANT -GRANT ALL ON eat_memory_on_slices2 TO role1_memory_test; -GRANT -GRANT ALL ON memory_result TO role1_memory_test; -GRANT - -GRANT ALL ON eat_memory_on_qd TO role2_memory_test; -GRANT -GRANT ALL ON eat_memory_on_one_slice2 TO role2_memory_test; -GRANT -GRANT ALL ON eat_memory_on_slices2 TO role2_memory_test; -GRANT -GRANT ALL ON memory_result TO role2_memory_test; -GRANT - --- 1.1) QD only in transaction -1: SET ROLE TO role1_memory_test; -SET --- check initial state -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem ------------------+----------+--------- - rg1_memory_test | 0 | 0 - rg1_memory_test | 1 | 0 - rg2_memory_test | 0 | 0 - rg2_memory_test | 1 | 0 -(4 rows) -1: BEGIN; -BEGIN -1: SELECT * FROM eat_memory_on_qd; - hold_memory -------------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem ------------------+----------+--------- - rg1_memory_test | 0 | 0 - rg1_memory_test | 1 | 20 - rg2_memory_test | 0 | 0 - rg2_memory_test | 1 | 0 -(4 rows) -1q: ... - --- 1.2) QD only -1: SET ROLE TO role1_memory_test; -SET --- check initial state -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem ------------------+----------+--------- - rg1_memory_test | 0 | 0 - rg1_memory_test | 1 | 0 - rg2_memory_test | 0 | 0 - rg2_memory_test | 1 | 0 -(4 rows) -1: SELECT * FROM eat_memory_on_qd; - hold_memory -------------- - 0 -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem ------------------+----------+--------- - rg1_memory_test | 0 | 0 - rg1_memory_test | 1 | 0 - rg2_memory_test | 0 | 0 - rg2_memory_test | 1 | 0 -(4 rows) -1q: ... - --- 2.1) QEs on one slice -1: SET ROLE TO role1_memory_test; -SET -1: SELECT * FROM eat_memory_on_one_slice2; - ?column? ----------- - f -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem ------------------+----------+--------- - rg1_memory_test | 0 | 0 - rg1_memory_test | 1 | 0 - rg2_memory_test | 0 | 0 - rg2_memory_test | 1 | 0 -(4 rows) -1q: ... - --- 2.2) QEs on one slice in transaction -1: SET ROLE TO role1_memory_test; -SET -1: BEGIN; -BEGIN -1: SELECT * FROM eat_memory_on_one_slice2; - ?column? ----------- - f -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem ------------------+----------+--------- - rg1_memory_test | 0 | 20 - rg1_memory_test | 1 | 0 - rg2_memory_test | 0 | 0 - rg2_memory_test | 1 | 0 -(4 rows) -1q: ... - --- 2.3) QEs on one slice change resource group -1: SET ROLE TO role1_memory_test; -SET -1: SELECT * FROM eat_memory_on_one_slice2; - ?column? ----------- - f -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem ------------------+----------+--------- - rg1_memory_test | 0 | 0 - rg1_memory_test | 1 | 0 - rg2_memory_test | 0 | 0 - rg2_memory_test | 1 | 0 -(4 rows) -1: SET ROLE TO role2_memory_test; -SET -1: SELECT * FROM eat_memory_on_one_slice2; - ?column? ----------- - f -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem ------------------+----------+--------- - rg1_memory_test | 0 | 0 - rg1_memory_test | 1 | 0 - rg2_memory_test | 0 | 0 - rg2_memory_test | 1 | 0 -(4 rows) -1q: ... - --- 3) QEs on multiple slices -1: SET ROLE TO role1_memory_test; -SET -1: SELECT * FROM eat_memory_on_slices2; - ?column? ----------- - f -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem ------------------+----------+--------- - rg1_memory_test | 0 | 0 - rg1_memory_test | 1 | 0 - rg2_memory_test | 0 | 0 - rg2_memory_test | 1 | 0 -(4 rows) -1q: ... - --- recheck after cleanup -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem ------------------+----------+--------- - rg1_memory_test | 0 | 0 - rg1_memory_test | 1 | 0 - rg2_memory_test | 0 | 0 - rg2_memory_test | 1 | 0 -(4 rows) - --- 4) single role concurrency test -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role1_memory_test; -SET --- QEs on multiple slices -1: SELECT * FROM eat_memory_on_slices2; - ?column? ----------- - f -(1 row) -2: SELECT * FROM eat_memory_on_slices2; - ?column? ----------- - f -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem ------------------+----------+--------- - rg1_memory_test | 0 | 0 - rg1_memory_test | 1 | 0 - rg2_memory_test | 0 | 0 - rg2_memory_test | 1 | 0 -(4 rows) -1q: ... -2q: ... - --- 5) multi role concurrency test -1: SET ROLE TO role1_memory_test; -SET -2: SET ROLE TO role2_memory_test; -SET --- QEs on multiple slices -1: SELECT * FROM eat_memory_on_slices2; - ?column? ----------- - f -(1 row) -2: SELECT * FROM eat_memory_on_slices2; - ?column? ----------- - f -(1 row) -SELECT * FROM memory_result; - rsgname | ismaster | avg_mem ------------------+----------+--------- - rg1_memory_test | 0 | 0 - rg1_memory_test | 1 | 0 - rg2_memory_test | 0 | 0 - rg2_memory_test | 1 | 0 -(4 rows) -1q: ... -2q: ... - --- cleanup -REVOKE ALL ON eat_memory_on_qd FROM role1_memory_test; -REVOKE -REVOKE ALL ON eat_memory_on_one_slice2 FROM role1_memory_test; -REVOKE -REVOKE ALL ON eat_memory_on_slices2 FROM role1_memory_test; -REVOKE -REVOKE ALL ON memory_result FROM role1_memory_test; -REVOKE - -REVOKE ALL ON eat_memory_on_qd FROM role2_memory_test; -REVOKE -REVOKE ALL ON eat_memory_on_one_slice2 FROM role2_memory_test; -REVOKE -REVOKE ALL ON eat_memory_on_slices2 FROM role2_memory_test; -REVOKE -REVOKE ALL ON memory_result FROM role2_memory_test; -REVOKE - -ALTER ROLE role1_memory_test RESOURCE GROUP none; -ALTER -ALTER ROLE role2_memory_test RESOURCE GROUP none; -ALTER - -DROP ROLE role1_memory_test; -DROP -DROP ROLE role2_memory_test; -DROP -DROP RESOURCE GROUP rg1_memory_test; -DROP -DROP RESOURCE GROUP rg2_memory_test; -DROP -DROP VIEW memory_result; -DROP diff --git a/src/test/isolation2/sql/resgroup/resgroup_alter_concurrency.sql b/src/test/isolation2/sql/resgroup/resgroup_alter_concurrency.sql index 9bd71776317..0e7304ef3ac 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_alter_concurrency.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_alter_concurrency.sql @@ -61,14 +61,13 @@ ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2; 21:SET ROLE role_concurrency_test; 22:SET ROLE role_concurrency_test; -21&:BEGIN; +21:BEGIN; 22&:BEGIN; SELECT * FROM rg_activity_status; 11:END; 11q: -21<: 22<: SELECT * FROM rg_activity_status; @@ -232,3 +231,4 @@ DROP VIEW rg_activity_status; DROP ROLE role_concurrency_test; DROP RESOURCE GROUP rg_concurrency_test; -- end_ignore + diff --git a/src/test/isolation2/sql/resgroup/resgroup_parallel_queries.sql b/src/test/isolation2/sql/resgroup/resgroup_parallel_queries.sql deleted file mode 100644 index 685fd199f14..00000000000 --- a/src/test/isolation2/sql/resgroup/resgroup_parallel_queries.sql +++ /dev/null @@ -1,269 +0,0 @@ --- start_matchsubs --- m/ERROR: tuple concurrently updated \(heapam\.c\:\d+\)/ --- s/\(heapam\.c:\d+\)// --- end_matchsubs -CREATE EXTENSION dblink; - --- This function execute commands N times. --- % in command will be replaced by number specified by range1 sequentially --- # in command will be replaced by number specified by range2 randomly --- range, eg: 1-10 --- Notice: now it only support SELECT statement return single integer -CREATE or replace FUNCTION exec_commands_n /*in func*/ - (dl_name text, command1 text, /*in func*/ - command2 text, command3 text, /*in func*/ - times integer, range1 text, range2 text, fail_on_error bool) /*in func*/ -RETURNS integer AS $$ /*in func*/ -DECLARE /*in func*/ - cmd text; /*in func*/ - res int; /*in func*/ - s_r1 int; /*in func*/ - e_r1 int; /*in func*/ - s_r2 int; /*in func*/ - e_r2 int; /*in func*/ -BEGIN /*in func*/ - s_r1 = 0; /*in func*/ - e_r1 = 0; /*in func*/ - s_r2 = 0; /*in func*/ - e_r2 = 0; /*in func*/ - IF length(range1) > 0 THEN /*in func*/ - select t[1]::int, t[2]::int into s_r1, e_r1 from regexp_split_to_array(range1, '-') t; /*in func*/ - END IF; /*in func*/ - IF length(range2) > 0 THEN /*in func*/ - select t[1]::int, t[2]::int into s_r2, e_r2 from regexp_split_to_array(range2, '-') t; /*in func*/ - END IF; /*in func*/ - FOR i IN 0..(times - 1) LOOP /*in func*/ - IF length(command1) > 0 THEN /*in func*/ - cmd = regexp_replace(command1, '%', (s_r1 + i % (e_r1 - s_r1 + 1))::text, 'g'); /*in func*/ - cmd = regexp_replace(cmd, '#', (s_r2 + ((random()*100)::int) % (e_r2 - s_r2 + 1))::text, 'g'); /*in func*/ - RAISE NOTICE '%', cmd; /*in func*/ - IF lower(cmd) like 'select %' THEN /*in func*/ - select * into res from dblink(dl_name, cmd, fail_on_error) t(c1 integer); /*in func*/ - ELSE /*in func*/ - perform dblink_exec(dl_name, cmd , fail_on_error); /*in func*/ - END IF; /*in func*/ - END IF; /*in func*/ - IF length(command2) > 0 THEN /*in func*/ - cmd = regexp_replace(command2, '%', (s_r1 + i % (e_r1 - s_r1 + 1))::text, 'g'); /*in func*/ - cmd = regexp_replace(cmd, '#', (s_r2 + ((random()*100)::int) % (e_r2 - s_r2 + 1))::text, 'g'); /*in func*/ - RAISE NOTICE '%', cmd; /*in func*/ - IF lower(cmd) like 'select %' THEN /*in func*/ - select * into res from dblink(dl_name, cmd, fail_on_error) t(c1 integer); /*in func*/ - ELSE /*in func*/ - perform dblink_exec(dl_name, cmd, fail_on_error); /*in func*/ - END IF; /*in func*/ - END IF; /*in func*/ - IF length(command3) > 0 THEN /*in func*/ - cmd = regexp_replace(command3, '%', (s_r1 + i % (e_r1 - s_r1 + 1))::text, 'g'); /*in func*/ - cmd = regexp_replace(cmd, '#', (s_r2 + ((random()*100)::int) % (e_r2 - s_r2 + 1))::text, 'g'); /*in func*/ - RAISE NOTICE '%', cmd; /*in func*/ - IF lower(cmd) like 'select %' THEN /*in func*/ - select * into res from dblink(dl_name, cmd, fail_on_error) t(c1 integer); /*in func*/ - ELSE /*in func*/ - perform dblink_exec(dl_name, cmd, fail_on_error); /*in func*/ - END IF; /*in func*/ - END IF; /*in func*/ - END LOOP; /*in func*/ - return times; /*in func*/ -END;$$ /*in func*/ -LANGUAGE 'plpgsql'; - --- --- DDLs vs DDLs --- -1:select dblink_connect('dblink_rg_test1', 'dbname=isolation2resgrouptest'); -2:select dblink_connect('dblink_rg_test2', 'dbname=isolation2resgrouptest'); -3:select dblink_connect('dblink_rg_test3', 'dbname=isolation2resgrouptest'); -4:select dblink_connect('dblink_rg_test4', 'dbname=isolation2resgrouptest'); -5:select dblink_connect('dblink_rg_test5', 'dbname=isolation2resgrouptest'); -6:select dblink_connect('dblink_rg_test6', 'dbname=isolation2resgrouptest'); - -1>:select exec_commands_n('dblink_rg_test1','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set concurrency #', 60, '', '1-6', false); -2>:select exec_commands_n('dblink_rg_test2','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set concurrency#', 60, '', '1-6', false); -3>:select exec_commands_n('dblink_rg_test3','CREATE RESOURCE GROUP rg_test_g# WITH (concurrency=#, cpu_hard_quota_limit=#)', 'DROP RESOURCE GROUP rg_test_g#', 'ALTER RESOURCE GROUP rg_test_g# set cpu_hard_quota_limit #', 60, '', '1-6', false); - -1<: -2<: -3<: -4<: -5<: -6<: - -1: select dblink_disconnect('dblink_rg_test1'); -2: select dblink_disconnect('dblink_rg_test2'); -3: select dblink_disconnect('dblink_rg_test3'); -4: select dblink_disconnect('dblink_rg_test4'); -5: select dblink_disconnect('dblink_rg_test5'); -6: select dblink_disconnect('dblink_rg_test6'); - - -1q: -2q: -3q: -4q: -5q: -6q: --- --- DDLs vs DMLs --- --- Prepare resource groups and roles and tables -create table rg_test_foo as select i as c1, i as c2 from generate_series(1,1000) i; -create table rg_test_bar as select i as c1, i as c2 from generate_series(1,1000) i; -grant all on rg_test_foo to public; -grant all on rg_test_bar to public; - --- start_ignore -select dblink_connect('dblink_rg_test', 'dbname=isolation2resgrouptest'); -select exec_commands_n('dblink_rg_test','DROP ROLE rg_test_r%', '', '', 7, '1-7', '', false); -select exec_commands_n('dblink_rg_test','DROP RESOURCE GROUP rg_test_g%', '', '', 7, '1-7', '', false); --- end_ignore - --- create 6 roles and 6 resource groups -select exec_commands_n('dblink_rg_test','CREATE RESOURCE GROUP rg_test_g% WITH (concurrency=9, cpu_hard_quota_limit=1)', '', '', 6, '1-6', '', true); -select exec_commands_n('dblink_rg_test','CREATE ROLE rg_test_r% login resource group rg_test_g%;', '', '', 6, '1-6', '', true); -select exec_commands_n('dblink_rg_test','GRANT ALL ON rg_test_foo to rg_test_r%;', '', '', 6, '1-6', '', true); -select exec_commands_n('dblink_rg_test','GRANT ALL ON rg_test_bar to rg_test_r%;', '', '', 6, '1-6', '', true); - -select dblink_disconnect('dblink_rg_test'); - -select groupname, concurrency, cpu_hard_quota_limit from gp_toolkit.gp_resgroup_config where groupname like 'rg_test_g%' order by groupname; - --- --- 2* : DMLs --- --- start 6 session to concurrently change resource group and run simple queries randomly --- BEGIN/END -21: select dblink_connect('dblink_rg_test21', 'dbname=isolation2resgrouptest'); -21>: select exec_commands_n('dblink_rg_test21', 'set role rg_test_r#', 'BEGIN', 'END', 24000, '', '1-6', true); --- BEGIN/ABORT -22: select dblink_connect('dblink_rg_test22', 'dbname=isolation2resgrouptest'); -22>: select exec_commands_n('dblink_rg_test22', 'set role rg_test_r#', 'BEGIN', 'ABORT', 24000, '', '1-6', true); --- query with memory sensitive node -23: select dblink_connect('dblink_rg_test23', 'dbname=isolation2resgrouptest'); -23>: select exec_commands_n('dblink_rg_test23', 'set role rg_test_r#', 'insert into rg_test_foo values (#, #)', 'select count(*) from rg_test_bar t1, rg_test_foo t2 where t1.c2=t2.c2 group by t1.c2', 3000, '', '1-6', true); --- high cpu -24: select dblink_connect('dblink_rg_test24', 'dbname=isolation2resgrouptest'); -24>: select exec_commands_n('dblink_rg_test24', 'set role rg_test_r#', 'insert into rg_test_bar values (#, #)', 'select count(*) from rg_test_bar where c2! = 1000', 60, '', '1-6', true); --- simple select -25: select dblink_connect('dblink_rg_test25', 'dbname=isolation2resgrouptest'); -25>: select exec_commands_n('dblink_rg_test25', 'set role rg_test_r#', 'select count(*) from rg_test_foo', 'select count(*) from rg_test_bar', 6000, '', '1-6', true); --- vacuum -26: select dblink_connect('dblink_rg_test26', 'dbname=isolation2resgrouptest'); -26>: select exec_commands_n('dblink_rg_test26', 'set role rg_test_r#', 'vacuum rg_test_bar', 'vacuum rg_test_foo', 6000, '', '1-6', true); - --- --- 3* : Alter groups --- --- start a new session to alter concurrency randomly -31: select dblink_connect('dblink_rg_test31', 'dbname=isolation2resgrouptest'); -31>: select exec_commands_n('dblink_rg_test31', 'alter resource group rg_test_g% set concurrency #', 'select 1 from pg_sleep(0.1)', '', 1000, '1-6', '0-5', true); - --- start a new session to alter cpu_hard_quota_limit randomly -32: select dblink_connect('dblink_rg_test32', 'dbname=isolation2resgrouptest'); -32>: select exec_commands_n('dblink_rg_test32', 'alter resource group rg_test_g% set cpu_hard_quota_limit #', 'select 1 from pg_sleep(0.1)', '', 1000, '1-6', '1-6', true); - - --- --- 4* : CREATE/DROP tables & groups --- --- start a new session to create and drop table, it will cause massive catchup interrupt. -41: select dblink_connect('dblink_rg_test41', 'dbname=isolation2resgrouptest'); -41>: select exec_commands_n('dblink_rg_test41', 'drop table if exists rg_test_t%', 'create table rg_test_t% (c1 int, c2 int)' ,'', 3000, '1-6', '', true); - --- start a new session to create & drop resource group -42: select dblink_connect('dblink_rg_test42', 'dbname=isolation2resgrouptest'); -42>: select exec_commands_n('dblink_rg_test42', 'create resource group rg_test_g7 with (cpu_hard_quota_limit=1)', 'drop resource group rg_test_g7', '', 1000, '', '', true); - -31<: -31: select exec_commands_n('dblink_rg_test31', 'alter resource group rg_test_g% set concurrency #', 'select 1 from pg_sleep(0.1)', '', 6, '1-6', '1-5', true); - --- start a new session to acquire the status of resource groups -44: select dblink_connect('dblink_rg_test44', 'dbname=isolation2resgrouptest'); -44>: select exec_commands_n('dblink_rg_test44', 'select count(*) from gp_toolkit.gp_resgroup_status;', '', '', 100, '', '', true); - --- wait all sessions to finish -21<: -22<: -23<: -24<: -25<: -26<: -32<: -33<: -34<: -41<: -42<: -44<: - -21: select dblink_disconnect('dblink_rg_test21'); -22: select dblink_disconnect('dblink_rg_test22'); -23: select dblink_disconnect('dblink_rg_test23'); -24: select dblink_disconnect('dblink_rg_test24'); -25: select dblink_disconnect('dblink_rg_test25'); -26: select dblink_disconnect('dblink_rg_test26'); -31: select dblink_disconnect('dblink_rg_test31'); -32: select dblink_disconnect('dblink_rg_test32'); -33: select dblink_disconnect('dblink_rg_test33'); -34: select dblink_disconnect('dblink_rg_test34'); -41: select dblink_disconnect('dblink_rg_test41'); -42: select dblink_disconnect('dblink_rg_test42'); -44: select dblink_disconnect('dblink_rg_test44'); - -21q: -22q: -23q: -24q: -25q: -26q: -31q: -32q: -33q: -34q: -41q: -42q: - -select groupname, concurrency::int < 7, cpu_hard_quota_limit::int < 7 from gp_toolkit.gp_resgroup_config where groupname like 'rg_test_g%' order by groupname; - --- Beacuse concurrency of each resource group is changed between 1..6, so the num_queued must be larger than 0 -select num_queued > 0 from gp_toolkit.gp_resgroup_status where rsgname like 'rg_test_g%' order by rsgname; - --- start_ignore -drop table rg_test_foo; -drop table rg_test_bar; -select dblink_connect('dblink_rg_test', 'dbname=isolation2resgrouptest'); -select exec_commands_n('dblink_rg_test','DROP ROLE rg_test_r%', '', '', 6, '1-6', '', true); -select exec_commands_n('dblink_rg_test','DROP RESOURCE GROUP rg_test_g%', '', '', 6, '1-6', '', true); -select dblink_disconnect('dblink_rg_test'); --- end_ignore - --- --- 5*: Test connections in utility mode are not governed by resource group --- -create resource group rg_test_g8 with (concurrency= 1, cpu_hard_quota_limit=1); -create role rg_test_r8 login resource group rg_test_g8; -51:select dblink_connect('dblink_rg_test51', 'dbname=isolation2resgrouptest user=rg_test_r8 options=''-c gp_role=utility'''); -52:select dblink_connect('dblink_rg_test52', 'dbname=isolation2resgrouptest user=rg_test_r8 options=''-c gp_role=utility'''); -53:select dblink_connect('dblink_rg_test53', 'dbname=isolation2resgrouptest user=rg_test_r8 options=''-c gp_role=utility'''); - -51>:select exec_commands_n('dblink_rg_test51', 'select 1', 'begin', 'end', 100, '', '', true); -51<: -52>:select exec_commands_n('dblink_rg_test52', 'select 1', 'select 1', 'select 1', 100, '', '', true); -52<: -53>:select exec_commands_n('dblink_rg_test53', 'select 1', 'begin', 'abort', 100, '', '', true); -53<: - -51: select dblink_disconnect('dblink_rg_test51'); -52: select dblink_disconnect('dblink_rg_test52'); -53: select dblink_disconnect('dblink_rg_test53'); - -51q: -52q: -53q: - --- num_executed and num_queued must be zero -select num_queued, num_executed from gp_toolkit.gp_resgroup_status where rsgname = 'rg_test_g8'; -drop role rg_test_r8; -drop resource group rg_test_g8; - --- clean up -select * from gp_toolkit.gp_resgroup_config; diff --git a/src/test/isolation2/sql/resgroup/restore_default_resgroup.sql b/src/test/isolation2/sql/resgroup/restore_default_resgroup.sql deleted file mode 100644 index 312878d155e..00000000000 --- a/src/test/isolation2/sql/resgroup/restore_default_resgroup.sql +++ /dev/null @@ -1,18 +0,0 @@ --- enable resource group and restart cluster. --- start_ignore -! gpconfig -c gp_resource_group_cpu_limit -v 0.9; -! gpconfig -c gp_resource_manager -v group; - --- 40 should be enough for the following cases and some --- weak test agents may not adopt a higher max_connections -! gpconfig -c max_connections -v 100 -m 40; -! gpstop -rai; --- end_ignore - -show gp_resource_manager; -show gp_resource_group_cpu_limit; -show max_connections; - --- by default admin_group has concurrency set to -1 which leads to --- very small memory quota for each resgroup slot, correct it. -ALTER RESOURCE GROUP admin_group SET concurrency 40; From ae7f5a8750e0598a19ad58f3bd9387d462291436 Mon Sep 17 00:00:00 2001 From: Zhenglong Li Date: Tue, 10 Jan 2023 14:24:35 +0800 Subject: [PATCH 40/46] Not grab distributed snapshot if it's direct dispatch (#13991) Currently, we will create a distributed snapshot in the function GetSnapshotData() if we are QD, and we will iterate procArray again to get the global xmin/xmax/xip. But if the current query could be dispatched to a single segment directly, which means it's a direct dispatch, there is no need to create a distributed snapshot, the local snapshot is enough. --- src/backend/cdb/cdbtm.c | 3 + src/backend/storage/ipc/procarray.c | 5 +- src/backend/tcop/pquery.c | 22 ++++++++ src/include/cdb/cdbtm.h | 4 ++ .../input/distributed_snapshot.source | 56 ++++++++++++++++--- .../output/distributed_snapshot.source | 33 +++++++++++ 6 files changed, 114 insertions(+), 9 deletions(-) diff --git a/src/backend/cdb/cdbtm.c b/src/backend/cdb/cdbtm.c index acd6168cc9a..302d310f27d 100644 --- a/src/backend/cdb/cdbtm.c +++ b/src/backend/cdb/cdbtm.c @@ -59,6 +59,8 @@ #include "utils/snapmgr.h" #include "utils/memutils.h" +#include "nodes/plannodes.h" + typedef struct TmControlBlock { bool DtmStarted; @@ -86,6 +88,7 @@ uint32 *shmNextSnapshotId; slock_t *shmGxidGenLock; int max_tm_gxacts = 100; +bool needDistributedSnapshot = true; int gp_gxid_prefetch_num; #define GXID_PRETCH_THRESHOLD (gp_gxid_prefetch_num>>1) diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 3ff56accb7b..80c46d314ca 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -3244,8 +3244,9 @@ GetSnapshotData(Snapshot snapshot, DtxContext distributedTransactionContext) if (!TransactionIdIsValid(MyProc->xmin)) MyProc->xmin = TransactionXmin = xmin; - /* GP: QD takes a distributed snapshot */ - if (distributedTransactionContext == DTX_CONTEXT_QD_DISTRIBUTED_CAPABLE && !Debug_disable_distributed_snapshot) + /* GP: QD takes a distributed snapshot iff QD not in retry phase and the query needs distributed snapshot */ + if (distributedTransactionContext == DTX_CONTEXT_QD_DISTRIBUTED_CAPABLE && !Debug_disable_distributed_snapshot + && needDistributedSnapshot) { CreateDistributedSnapshot(ds); snapshot->haveDistribSnapshot = true; diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 3974f6ad0d2..e588833dc6b 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -32,6 +32,7 @@ #include "utils/snapmgr.h" #include "cdb/ml_ipc.h" +#include "cdb/cdbtm.h" #include "commands/createas.h" #include "commands/queue.h" #include "commands/createas.h" @@ -593,12 +594,33 @@ PortalStart(Portal portal, ParamListInfo params, { case PORTAL_ONE_SELECT: + /* + * GPDB: If we just have one motion and slices[1] can be direct dispatched, + * we do not need to grab distributed snapshot on QD, the local snapshot on + * QE is enough if we meet direct dispatch. + * + * This could improve some efficiency on OLTP. + */ + if (Gp_role == GP_ROLE_DISPATCH && !IsInTransactionBlock(true) && !snapshot) + { + /* check whether we need to create distributed snapshot */ + int determinedSliceIndex = 1; + PlannedStmt *pstmt = linitial_node(PlannedStmt, portal->stmts); + + if (pstmt->numSlices == 2 && + pstmt->slices[determinedSliceIndex].directDispatch.isDirectDispatch) + needDistributedSnapshot = false; + } + /* Must set snapshot before starting executor. */ if (snapshot) PushActiveSnapshot(snapshot); else PushActiveSnapshot(GetTransactionSnapshot()); + /* reset value */ + needDistributedSnapshot = true; + /* * We could remember the snapshot in portal->portalSnapshot, * but presently there seems no need to, as this code path diff --git a/src/include/cdb/cdbtm.h b/src/include/cdb/cdbtm.h index a0dd0886c8b..7bcb8196a74 100644 --- a/src/include/cdb/cdbtm.h +++ b/src/include/cdb/cdbtm.h @@ -277,6 +277,10 @@ typedef enum extern int max_tm_gxacts; extern int gp_gxid_prefetch_num; +/* whether we need a distributed snapshot or not, updated before each + * query been dispatched. */ +extern bool needDistributedSnapshot; + extern DtxContext DistributedTransactionContext; /* state variables for how much of the log file has been flushed */ diff --git a/src/test/isolation2/input/distributed_snapshot.source b/src/test/isolation2/input/distributed_snapshot.source index f15fecd39a8..a587e8e4b9a 100644 --- a/src/test/isolation2/input/distributed_snapshot.source +++ b/src/test/isolation2/input/distributed_snapshot.source @@ -316,7 +316,7 @@ create table distributed_snapshot_fix1(a int); -- Details about how we consume it: -- 1. Using test_consume_xids to consume what's needed - 2; -- 2. The current transaction consumes 1 xid; --- 3. Use another transaction to consume 1 more. This is to mark the last +-- 3. Use another transaction to consume 1 more. This is to mark the last -- one completed so that after restart we can start from that. 1U: begin; 1U: select test_consume_xids((131070 - (cur % 131072))::int) from txid_current() cur; @@ -331,22 +331,64 @@ create table distributed_snapshot_fix1(a int); -- would successfully truncate the current working segment. select pg_ctl(datadir, 'restart') from gp_segment_configuration where role = 'p' and content = 1; --- Do a SELECT. This assigns distributed snapshot but it won't assign new xid. --- Since we'll advance to the next future xid which is the first xid of the next segment, --- this will get all DLOG segments truncated. +-- Do a SELECT. This assigns distributed snapshot but it won't assign new xid. +-- Since we'll advance to the next future xid which is the first xid of the next segment, +-- this will get all DLOG segments truncated. 1: select * from distributed_snapshot_fix1; --- Checking the DLOG segments we have right now, which is none. +-- Checking the DLOG segments we have right now, which is none. 1U: select count(*) from gp_distributed_log; 1Uq: 1q: --- Restart server again. Previously DistributedLogShared->oldestXmin is initialized to +-- Restart server again. Previously DistributedLogShared->oldestXmin is initialized to -- latestCompletedXid. select pg_ctl(datadir, 'restart') from gp_segment_configuration where role = 'p' and content = 1; --- Do a SELECT. Previously this would complain about missing segment file because we've +-- Do a SELECT. Previously this would complain about missing segment file because we've -- truncated the segment that latestCompletedXid is on. Now we don't, because we will -- be advancing from latestCompletedXid + 1. 1: select * from distributed_snapshot_fix1; + + +-- test the distributed snapshot in the situation of direct dispatch +0: create or replace language plpython3u; + +0: create table direct_dispatch_snapshot_alpha(a int, b int); +0: insert into direct_dispatch_snapshot_alpha select i, i from generate_series(1, 10) i; + +0: create function distributed_snapshot_test(is_direct_dispatch boolean) returns boolean as $$ +import re +from pg import DB +from copy import deepcopy + +dbname = plpy.execute("select current_database() db")[0]["db"] +db = DB(dbname=dbname) + +info_results = [] +db.set_notice_receiver(lambda n: info_results.append(deepcopy(n.message))) + +db.query("set Debug_print_full_dtm = on") +db.query("set client_min_messages = log") + +if is_direct_dispatch: + # should not create distributed snapshot + db.query("select * from direct_dispatch_snapshot_alpha where a = 6") +else: + # should create distributed snapshot + db.query("select * from direct_dispatch_snapshot_alpha where b = 6") + +info_string = ", ".join(info_results) +res = re.findall(r"Got distributed snapshot from CreateDistributedSnapshot", info_string) + +if (is_direct_dispatch and len(res) == 1) or (not is_direct_dispatch and len(res) == 2): + return True + +return False + +$$ language plpython3u; + +0: select distributed_snapshot_test(true); +0: select distributed_snapshot_test(false); + diff --git a/src/test/isolation2/output/distributed_snapshot.source b/src/test/isolation2/output/distributed_snapshot.source index 9a03cc04099..4f47a0ff034 100644 --- a/src/test/isolation2/output/distributed_snapshot.source +++ b/src/test/isolation2/output/distributed_snapshot.source @@ -610,3 +610,36 @@ select pg_ctl(datadir, 'restart') from gp_segment_configuration where role = 'p' --- 1 (1 row) + + +-- test the distributed snapshot in the situation of direct dispatch +0: create or replace language plpython3u; +CREATE + +0: create table direct_dispatch_snapshot_alpha(a int, b int); +CREATE +0: insert into direct_dispatch_snapshot_alpha select i, i from generate_series(1, 10) i; +INSERT 10 + +0: create function distributed_snapshot_test(is_direct_dispatch boolean) returns boolean as $$ import re from pg import DB from copy import deepcopy +dbname = plpy.execute("select current_database() db")[0]["db"] db = DB(dbname=dbname) +info_results = [] db.set_notice_receiver(lambda n: info_results.append(deepcopy(n.message))) +db.query("set Debug_print_full_dtm = on") db.query("set client_min_messages = log") +if is_direct_dispatch: # should not create distributed snapshot db.query("select * from direct_dispatch_snapshot_alpha where a = 6") else: # should create distributed snapshot db.query("select * from direct_dispatch_snapshot_alpha where b = 6") +info_string = ", ".join(info_results) res = re.findall(r"Got distributed snapshot from CreateDistributedSnapshot", info_string) +if (is_direct_dispatch and len(res) == 1) or (not is_direct_dispatch and len(res) == 2): return True +return False +$$ language plpython3u; +CREATE + +0: select distributed_snapshot_test(true); + distributed_snapshot_test +--------------------------- + t +(1 row) +0: select distributed_snapshot_test(false); + distributed_snapshot_test +--------------------------- + t +(1 row) + From 47a80b7952db77d16aaa0c9113250aaa7c48f1c8 Mon Sep 17 00:00:00 2001 From: airfan Date: Tue, 10 Jan 2023 20:03:40 +0800 Subject: [PATCH 41/46] Change some resource group variables type to int64 to avoid overflow. Change of totalQueued and totalExecuted from int to int64 to avoid overflow after long running. Co-authored-by: huaxi.shx --- src/backend/utils/resgroup/resgroup.c | 4 ++-- src/include/catalog/catversion.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/backend/utils/resgroup/resgroup.c b/src/backend/utils/resgroup/resgroup.c index 1aa1ba4fa6c..6a857d13662 100644 --- a/src/backend/utils/resgroup/resgroup.c +++ b/src/backend/utils/resgroup/resgroup.c @@ -139,8 +139,8 @@ struct ResGroupData volatile int nRunning; /* number of running trans */ volatile int nRunningBypassed; /* number of running trans in bypass mode */ - int totalExecuted; /* total number of executed trans */ - int totalQueued; /* total number of queued trans */ + int64 totalExecuted; /* total number of executed trans */ + int64 totalQueued; /* total number of queued trans */ int64 totalQueuedTimeMs; /* total queue time, in milliseconds */ PROC_QUEUE waitProcs; /* list of PGPROC objects waiting on this group */ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index ca6c907dbe2..1acb192ff70 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -56,6 +56,6 @@ */ /* 3yyymmddN */ -#define CATALOG_VERSION_NO 302208261 +#define CATALOG_VERSION_NO 302301101 #endif From c445a86997111ced69a260ca4759b295f2371a63 Mon Sep 17 00:00:00 2001 From: airfan Date: Wed, 11 Jan 2023 21:35:36 +0800 Subject: [PATCH 42/46] Fix resgroup init error when there is a lot of cores in cpuset.cpus. When gpdb calls InitResGroups to init a postgres backend, readStr is called to read cpuset assigned to gpdb. However the size of data buffer in readStr is too small, cpuset string readed by gpdb is truncated. This commit change the buffer size from MAX_INT_STRING_LEN(20) to MAX_CGROUP_CONTENTLEN(1024) to fix resgroup init error when there is a lot of cores in cpuset.cpus Co-authored-by: huaxi.shx --- src/backend/utils/resgroup/cgroup.c | 2 +- src/include/utils/cgroup.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/backend/utils/resgroup/cgroup.c b/src/backend/utils/resgroup/cgroup.c index c0ce70ebbff..ecc87928cda 100644 --- a/src/backend/utils/resgroup/cgroup.c +++ b/src/backend/utils/resgroup/cgroup.c @@ -394,7 +394,7 @@ void readStr(Oid group, BaseDirType base, CGroupComponentType component, const char *filename, char *str, int len) { - char data[MAX_INT_STRING_LEN]; + char data[MAX_CGROUP_CONTENTLEN]; size_t data_size = sizeof(data); char path[MAX_CGROUP_PATHLEN]; size_t path_size = sizeof(path); diff --git a/src/include/utils/cgroup.h b/src/include/utils/cgroup.h index 3702df3dabc..abd58c9a44b 100644 --- a/src/include/utils/cgroup.h +++ b/src/include/utils/cgroup.h @@ -25,6 +25,7 @@ #define GPDB_SYSTEM_CGROUP 6441 #define MAX_CGROUP_PATHLEN 256 +#define MAX_CGROUP_CONTENTLEN 1024 #define CGROUP_ERROR(...) elog(ERROR, __VA_ARGS__) #define CGROUP_CONFIG_ERROR(...) \ From a1c1dd7d029822b4ae68e450dfeb980c4e6e63c5 Mon Sep 17 00:00:00 2001 From: QingMa Date: Thu, 12 Jan 2023 09:35:07 +0800 Subject: [PATCH 43/46] Set sync/unsync flag for gucs (#13948) The GUC's name must be populated into `sync_guc_name.h` if it needs to sync value between QD and QEs. QD will dispatch its current synced GUC values (as startup options) to create QEs. Otherwise, the settings will not take effect on the newly created QE. An example of GUC inconsistency between QD and QE: ``` CREATE OR REPLACE FUNCTION cleanupAllGangs() RETURNS BOOL AS '@abs_builddir@/../regress/regress.so', 'cleanupAllGangs' LANGUAGE C; CREATE OR REPLACE FUNCTION public.segment_setting(guc text) RETURNS SETOF text EXECUTE ON ALL SEGMENTS AS $$ BEGIN RETURN NEXT pg_catalog.current_setting(guc); END $$ LANGUAGE plpgsql; postgres=# show allow_segment_DML; allow_segment_DML ------------------- off (1 row) postgres=# set allow_segment_DML = on; SET postgres=# show allow_segment_DML; allow_segment_DML ------------------- on (1 row) postgres=# select public.segment_setting('allow_segment_DML'); segment_setting ----------------- on on on (3 rows) postgres=# select cleanupAllGangs(); cleanupallgangs ----------------- t (1 row) postgres=# show allow_segment_DML; allow_segment_DML ------------------- on (1 row) postgres=# select public.segment_setting('allow_segment_DML'); segment_setting ----------------- off off off (3 rows) ``` --- src/include/utils/sync_guc_name.h | 73 +++++++++++++++++++++++------ src/include/utils/unsync_guc_name.h | 67 +++++--------------------- 2 files changed, 72 insertions(+), 68 deletions(-) diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 415e248ed5a..7c043533f7d 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -9,13 +9,34 @@ *-------------------------------------------------------------------------- */ /* items in this file should be ordered */ + "allow_segment_DML", + "allow_system_table_mods", + "application_name", + "array_nulls", "backtrace_functions", "bytea_output", + "backend_flush_after", + "backslash_quote", "client_min_messages", "commit_delay", "commit_siblings", "coredump_on_memerror", "DateStyle", + "Debug_appendonly_print_append_block", + "debug_appendonly_print_blockdirectory", + "debug_appendonly_print_compaction", + "debug_appendonly_print_datumstream", + "debug_appendonly_print_delete", + "debug_appendonly_print_insert", + "debug_appendonly_print_insert_tuple", + "Debug_appendonly_print_read_block", + "debug_appendonly_print_scan", + "debug_appendonly_print_scan_tuple", + "debug_appendonly_print_segfile_choice", + "debug_appendonly_print_storage_headers", + "debug_appendonly_print_verify_write_block", + "debug_appendonly_print_visimap", + "debug_appendonly_use_no_toast", "debug_discard_caches", "default_table_access_method", "default_index_access_method", @@ -29,11 +50,17 @@ "gin_fuzzy_search_limit", "gin_pending_list_limit", "gp_allow_date_field_width_5digits", + "gp_allow_rename_relation_without_lock", + "gp_appendonly_compaction", + "gp_appendonly_compaction_threshold", + "gp_appendonly_verify_block_checksums", + "gp_appendonly_verify_write_block", "gp_blockdirectory_entry_min_range", "gp_blockdirectory_minipage_size", "gp_debug_linger", "gp_default_storage_options", "gp_disable_tuple_hints", + "gp_enable_interconnect_aggressive_retry", "gp_enable_runtime_filter", "gp_enable_segment_copy_checking", "gp_external_enable_filter_pushdown", @@ -44,6 +71,7 @@ "gp_indexcheck_insert", "gp_initial_bad_row_limit", "gp_interconnect_address_type", + "gp_interconnect_cache_future_packets", "gp_interconnect_debug_retry_interval", "gp_interconnect_default_rtt", "gp_interconnect_fc_method", @@ -66,7 +94,6 @@ "gp_log_stack_trace_lines", "gp_log_suboverflow_statement", "gp_max_packet_size", - "gp_max_slices", "gp_motion_slice_noop", "gp_resgroup_debug_wait_queue", "gp_resqueue_memory_policy_auto_fixed_mem", @@ -81,13 +108,13 @@ "gp_udpic_fault_inject_bitmap", "gp_udpic_fault_inject_percent", "gp_udpic_network_disable_ipv6", - "gp_vmem_idle_resource_timeout", "gp_workfile_caching_loglevel", "gp_workfile_compression", "gp_workfile_limit_files_per_query", "gp_workfile_limit_per_query", "hash_mem_multiplier", - "idle_in_transaction_session_timeout", + "ignore_system_indexes", + "ignore_checksum_failure", "IntervalStyle", "jit", "jit_above_cost", @@ -98,9 +125,11 @@ "jit_optimize_above_cost", "jit_profiling_support", "jit_tuple_deforming", + "lc_messages", "lc_monetary", "lc_numeric", "lc_time", + "lock_timeout", "log_btree_build_stats", "log_dispatch_stats", "log_duration", @@ -122,37 +151,53 @@ "memory_profiler_query_id", "memory_profiler_run_id", "optimize_bounded_sort", - "optimizer_cte_inlining_bound", - "optimizer_mdcache_size", - "optimizer_partition_selection_log", - "optimizer_plan_id", - "optimizer_push_group_by_below_setop_threshold", - "optimizer_samples_number", - "optimizer_xform_bind_threshold", - "parallel_setup_cost", - "parallel_tuple_cost", - "planner_work_mem", "pljava_classpath", "pljava_release_lingering_savepoints", "pljava_statement_cache_size", "pljava_vmoptions", + "readable_external_table_timeout", "row_security", "search_path", + "seed", + "stats_queue_level", "statement_mem", "statement_timeout", + "synchronize_seqscans", + "tcp_keepalives_count", + "tcp_keepalives_idle", + "tcp_keepalives_interval", + "tcp_user_timeout", "temp_buffers", "temp_tablespaces", "test_copy_qd_qe_split", "test_print_prefetch_joinqual", - "TimeZone", + "timezone_abbreviations", "trace_syncscan", "track_wal_io_timing", + "TimeZone", + "vacuum_cleanup_index_scale_factor", + "vacuum_cost_delay", + "vacuum_cost_limit", + "vacuum_cost_page_dirty", + "vacuum_cost_page_hit", + "vacuum_cost_page_miss", "vacuum_failsafe_age", + "vacuum_freeze_min_age", + "vacuum_freeze_table_age", "vacuum_multixact_failsafe_age", + "vacuum_multixact_freeze_min_age", + "vacuum_multixact_freeze_table_age", "verify_gpfdists_cert", "vmem_process_interrupt", "wal_debug", + "wal_sender_timeout", "work_mem", "gp_appendonly_insert_files", "gp_appendonly_insert_files_tuples_range", "gp_random_insert_segments", + "gp_resgroup_debug_wait_queue", + "gp_resgroup_memory_policy", + "gp_resource_group_bypass", + "gp_resource_group_enable_recalculate_query_mem", + "gp_write_shared_snapshot", + "zero_damaged_pages", diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 1604d812d83..4eed0bdb014 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -9,15 +9,11 @@ *-------------------------------------------------------------------------- */ /* items in this file should be ordered */ - "allow_segment_DML", - "allow_system_table_mods", "enable_answer_query_using_materialized_views", - "application_name", "archive_cleanup_command", "archive_command", "archive_mode", "archive_timeout", - "array_nulls", "authentication_timeout", "autocommit", "autovacuum", @@ -34,8 +30,6 @@ "autovacuum_vacuum_scale_factor", "autovacuum_vacuum_threshold", "autovacuum_work_mem", - "backend_flush_after", - "backslash_quote", "bgwriter_delay", "bgwriter_flush_after", "bgwriter_lru_maxpages", @@ -66,21 +60,6 @@ "db_user_namespace", "deadlock_timeout", "debug_abort_after_distributed_prepared", - "Debug_appendonly_print_append_block", - "debug_appendonly_print_blockdirectory", - "debug_appendonly_print_compaction", - "debug_appendonly_print_datumstream", - "debug_appendonly_print_delete", - "debug_appendonly_print_insert", - "debug_appendonly_print_insert_tuple", - "Debug_appendonly_print_read_block", - "debug_appendonly_print_scan", - "debug_appendonly_print_scan_tuple", - "debug_appendonly_print_segfile_choice", - "debug_appendonly_print_storage_headers", - "debug_appendonly_print_verify_write_block", - "debug_appendonly_print_visimap", - "debug_appendonly_use_no_toast", "debug_assertions", "debug_basebackup", "debug_bitmap_print_insert", @@ -169,10 +148,6 @@ "geqo_threshold", "gp_adjust_selectivity_for_outerjoins", "gp_allow_non_uniform_partitioning_ddl", - "gp_appendonly_compaction", - "gp_appendonly_compaction_threshold", - "gp_appendonly_verify_block_checksums", - "gp_appendonly_verify_write_block", "gp_auth_time_override", "gp_autostats_allow_nonowner", "gp_autostats_mode", @@ -207,7 +182,6 @@ "gp_enable_groupext_distinct_gather", "gp_enable_groupext_distinct_pruning", "gp_enable_hashjoin_size_heuristic", - "gp_enable_interconnect_aggressive_retry", "gp_enable_minmax_optimization", "gp_enable_motion_deadlock_sanity", "gp_enable_multiphase_agg", @@ -245,7 +219,6 @@ "gp_gxid_prefetch_num", "gp_heap_require_relhasoids_match", "gp_instrument_shmem_size", - "gp_interconnect_cache_future_packets", "gp_is_writer", "gp_local_distributed_cache_stats", "gp_log_dynamic_partition_pruning", @@ -256,6 +229,7 @@ "gp_maintenance_conn", "gp_max_local_distributed_cache", "gp_max_plan_size", + "gp_max_slices", "gp_motion_cost_per_row", "gp_pause_on_restore_point_replay", "gp_predicate_pushdown_sample_rows", @@ -267,7 +241,6 @@ "gp_reject_internal_tcp_connection", "gp_reject_percent_threshold", "gp_reraise_signal", - "gp_resource_group_bypass", "gp_resource_group_cpu_limit", "gp_resource_group_cpu_priority", "gp_resource_group_enable_cgroup_version_two", @@ -299,23 +272,22 @@ "gp_statistics_pullup_from_child_partition", "gp_statistics_use_fkeys", "gp_subtrans_warn_limit", + "gp_vmem_idle_resource_timeout", "gp_use_legacy_hashops", "gp_vmem_limit_per_query", "gp_vmem_protect_limit", "gp_vmem_protect_segworker_cache_limit", "gp_workfile_limit_per_segment", "gp_workfile_max_entries", - "gp_write_shared_snapshot", "hba_file", "hot_standby", "hot_standby_feedback", "huge_pages", "huge_page_size", "ident_file", + "idle_in_transaction_session_timeout", "idle_session_timeout", - "ignore_checksum_failure", "ignore_invalid_pages", - "ignore_system_indexes", "integer_datetimes", "in_hot_standby", "is_superuser", @@ -325,10 +297,8 @@ "krb_server_keyfile", "lc_collate", "lc_ctype", - "lc_messages", "listen_addresses", "local_preload_libraries", - "lock_timeout", "lo_compat_privileges", "log_autostats", "log_autovacuum_min_duration", @@ -399,6 +369,7 @@ "optimizer_cost_model", "optimizer_cost_threshold", "optimizer_cte_inlining", + "optimizer_cte_inlining_bound", "optimizer_damping_factor_filter", "optimizer_damping_factor_groupby", "optimizer_damping_factor_join", @@ -460,13 +431,16 @@ "optimizer_join_order_threshold", "optimizer_log", "optimizer_log_failure", + "optimizer_mdcache_size", "optimizer_metadata_caching", "optimizer_minidump", "optimizer_multilevel_partitioning", "optimizer_nestloop_factor", "optimizer_parallel_union", + "optimizer_partition_selection_log", "optimizer_penalize_broadcast_threshold", "optimizer_penalize_skew", + "optimizer_plan_id", "optimizer_print_expression_properties", "optimizer_print_group_properties", "optimizer_print_job_scheduler", @@ -482,9 +456,11 @@ "optimizer_print_xform_results", "optimizer_prune_computed_columns", "optimizer_prune_unused_columns", + "optimizer_push_group_by_below_setop_threshold", "optimizer_push_requirements_from_consumer_to_producer", "optimizer_remove_order_below_dml", "optimizer_replicated_table_insert", + "optimizer_samples_number", "optimizer_sample_plans", "optimizer_search_strategy_path", "optimizer_segments", @@ -493,9 +469,13 @@ "optimizer_trace_fallback", "optimizer_use_external_constant_expression_evaluation_for_ints", "optimizer_use_gpdb_allocators", + "optimizer_xform_bind_threshold", "parallel_leader_participation", + "parallel_setup_cost", + "parallel_tuple_cost", "password_encryption", "plan_cache_mode", + "planner_work_mem", "pljava_classpath_insecure", "pljava_debug", "port", @@ -506,7 +486,6 @@ "promote_trigger_file", "quote_all_identifiers", "random_page_cost", - "readable_external_table_timeout", "recovery_end_command", "recovery_init_sync_method", "recovery_min_apply_delay", @@ -527,7 +506,6 @@ "restore_command", "role", "runaway_detector_activation_percent", - "seed", "segment_size", "seq_page_cost", "server_encoding", @@ -557,10 +535,8 @@ "ssl_prefer_server_ciphers", "ssl_renegotiation_limit", "standard_conforming_strings", - "stats_queue_level", "stats_temp_directory", "superuser_reserved_connections", - "synchronize_seqscans", "synchronous_commit", "synchronous_standby_names", "syslog_facility", @@ -573,14 +549,9 @@ "task_log_statement", "task_use_background_worker", "task_timezone", - "tcp_keepalives_count", - "tcp_keepalives_idle", - "tcp_keepalives_interval", - "tcp_user_timeout", "temp_file_limit", "test_AppendOnlyHash_eviction_vs_just_marking_not_inuse", "test_print_direct_dispatch_info", - "timezone_abbreviations", "trace_lock_oidmin", "trace_locks", "trace_lock_table", @@ -603,17 +574,7 @@ "unix_socket_group", "unix_socket_permissions", "update_process_title", - "vacuum_cleanup_index_scale_factor", - "vacuum_cost_delay", - "vacuum_cost_limit", - "vacuum_cost_page_dirty", - "vacuum_cost_page_hit", - "vacuum_cost_page_miss", "vacuum_defer_cleanup_age", - "vacuum_freeze_min_age", - "vacuum_freeze_table_age", - "vacuum_multixact_freeze_min_age", - "vacuum_multixact_freeze_table_age", "wait_for_replication_threshold", "wal_block_size", "wal_buffers", @@ -629,7 +590,6 @@ "wal_recycle", "wal_retrieve_retry_interval", "wal_segment_size", - "wal_sender_timeout", "wal_skip_threshold", "wal_sync_method", "wal_writer_delay", @@ -639,7 +599,6 @@ "xid_warn_limit", "xmlbinary", "xmloption", - "zero_damaged_pages", "cluster_key_command", "file_encryption_method", "tde_force_switch", From 50f2611b105cffe4c91f7dbc8d7656b1f3d984e6 Mon Sep 17 00:00:00 2001 From: QingMa Date: Mon, 16 Jan 2023 10:23:41 +0800 Subject: [PATCH 44/46] Partially revert of fixes for greenplum-db#13948 (#14772) - Move guc `application_name` and `vacuum_cost_limit` back to `unsync_guc_name.h` to fix pipeline failure. Pipeline link: https://prod.ci.gpdb.pivotal.io/teams/main/pipelines/gpdb_main_without_asserts/jobs/gpconfig_rocky8/builds/19 - Remove several deprecated gucs. --- src/include/utils/sync_guc_name.h | 10 ++-------- src/include/utils/unsync_guc_name.h | 2 ++ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 7c043533f7d..41358876b1a 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -11,7 +11,6 @@ /* items in this file should be ordered */ "allow_segment_DML", "allow_system_table_mods", - "application_name", "array_nulls", "backtrace_functions", "bytea_output", @@ -50,7 +49,6 @@ "gin_fuzzy_search_limit", "gin_pending_list_limit", "gp_allow_date_field_width_5digits", - "gp_allow_rename_relation_without_lock", "gp_appendonly_compaction", "gp_appendonly_compaction_threshold", "gp_appendonly_verify_block_checksums", @@ -96,6 +94,7 @@ "gp_max_packet_size", "gp_motion_slice_noop", "gp_resgroup_debug_wait_queue", + "gp_resource_group_bypass", "gp_resqueue_memory_policy_auto_fixed_mem", "gp_resqueue_print_operator_memory_limits", "gp_select_invisible", @@ -112,6 +111,7 @@ "gp_workfile_compression", "gp_workfile_limit_files_per_query", "gp_workfile_limit_per_query", + "gp_write_shared_snapshot", "hash_mem_multiplier", "ignore_system_indexes", "ignore_checksum_failure", @@ -177,7 +177,6 @@ "TimeZone", "vacuum_cleanup_index_scale_factor", "vacuum_cost_delay", - "vacuum_cost_limit", "vacuum_cost_page_dirty", "vacuum_cost_page_hit", "vacuum_cost_page_miss", @@ -195,9 +194,4 @@ "gp_appendonly_insert_files", "gp_appendonly_insert_files_tuples_range", "gp_random_insert_segments", - "gp_resgroup_debug_wait_queue", - "gp_resgroup_memory_policy", - "gp_resource_group_bypass", - "gp_resource_group_enable_recalculate_query_mem", - "gp_write_shared_snapshot", "zero_damaged_pages", diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 4eed0bdb014..e7ef6f0ff87 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -10,6 +10,7 @@ */ /* items in this file should be ordered */ "enable_answer_query_using_materialized_views", + "application_name", "archive_cleanup_command", "archive_command", "archive_mode", @@ -574,6 +575,7 @@ "unix_socket_group", "unix_socket_permissions", "update_process_title", + "vacuum_cost_limit", "vacuum_defer_cleanup_age", "wait_for_replication_threshold", "wal_block_size", From a2a4657c9e97afb0947cccff763a7077c2615bd1 Mon Sep 17 00:00:00 2001 From: wenru yan Date: Tue, 7 Feb 2023 01:39:41 +0000 Subject: [PATCH 45/46] implement resource group memory limit control 1.support create/alter resource group with memory_limit, add the removed gucs(which are used to do memory limit) back; 2.support to acquire the amount of memory reserved for the query in resource group mode; 3.add a guc gp_resgroup_memory_query_fixed_mem to allow users set the memory limit for a query. --- src/backend/commands/resgroupcmds.c | 33 ++++++++-- src/backend/parser/gram.y | 12 ++-- src/backend/utils/misc/guc_gp.c | 43 +++++++++++++ src/backend/utils/resgroup/resgroup.c | 49 ++++++++++++++ src/backend/utils/resource_manager/memquota.c | 2 + .../utils/resource_manager/resource_manager.c | 4 ++ src/include/catalog/pg_resgroup.h | 1 + src/include/catalog/pg_resgroupcapability.dat | 3 + src/include/utils/resgroup.h | 7 ++ src/include/utils/sync_guc_name.h | 3 +- src/include/utils/unsync_guc_name.h | 3 + .../resgroup/resgroup_memory_limit.out | 64 +++++++++++++++++++ .../isolation2/isolation2_resgroup_schedule | 1 + .../sql/resgroup/resgroup_memory_limit.sql | 53 +++++++++++++++ 14 files changed, 268 insertions(+), 10 deletions(-) create mode 100644 src/test/isolation2/expected/resgroup/resgroup_memory_limit.out create mode 100644 src/test/isolation2/sql/resgroup/resgroup_memory_limit.sql diff --git a/src/backend/commands/resgroupcmds.c b/src/backend/commands/resgroupcmds.c index 18efac040b5..2f19fe7bd73 100644 --- a/src/backend/commands/resgroupcmds.c +++ b/src/backend/commands/resgroupcmds.c @@ -57,7 +57,7 @@ static int str2Int(const char *str, const char *prop); static ResGroupLimitType getResgroupOptionType(const char* defname); -static ResGroupCap getResgroupOptionValue(DefElem *defel, int type); +static ResGroupCap getResgroupOptionValue(DefElem *defel); static const char *getResgroupOptionName(ResGroupLimitType type); static void checkResgroupCapLimit(ResGroupLimitType type, ResGroupCap value); static void parseStmtOptions(CreateResourceGroupStmt *stmt, ResGroupCaps *caps); @@ -383,7 +383,7 @@ AlterResourceGroup(AlterResourceGroupStmt *stmt) } else { - value = getResgroupOptionValue(defel, limitType); + value = getResgroupOptionValue(defel); checkResgroupCapLimit(limitType, value); } @@ -438,6 +438,9 @@ AlterResourceGroup(AlterResourceGroupStmt *stmt) caps.cpuHardQuotaLimit = CPU_HARD_QUOTA_LIMIT_DISABLED; caps.cpuSoftPriority = RESGROUP_DEFAULT_CPU_SOFT_PRIORITY; break; + case RESGROUP_LIMIT_TYPE_MEMORY_LIMIT: + caps.memory_limit = value; + break; default: break; } @@ -577,6 +580,10 @@ GetResGroupCapabilities(Relation rel, Oid groupId, ResGroupCaps *resgroupCaps) case RESGROUP_LIMIT_TYPE_CPUSET: strlcpy(resgroupCaps->cpuset, value, sizeof(resgroupCaps->cpuset)); break; + case RESGROUP_LIMIT_TYPE_MEMORY_LIMIT: + resgroupCaps->memory_limit = str2Int(value, + getResgroupOptionName(type)); + break; default: break; } @@ -749,6 +756,8 @@ getResgroupOptionType(const char* defname) return RESGROUP_LIMIT_TYPE_CPUSET; else if (strcmp(defname, "cpu_soft_priority") == 0) return RESGROUP_LIMIT_TYPE_CPU_SHARES; + else if (strcmp(defname, "memory_limit") == 0) + return RESGROUP_LIMIT_TYPE_MEMORY_LIMIT; else return RESGROUP_LIMIT_TYPE_UNKNOWN; } @@ -757,7 +766,7 @@ getResgroupOptionType(const char* defname) * Get capability value from DefElem, convert from int64 to int */ static ResGroupCap -getResgroupOptionValue(DefElem *defel, int type) +getResgroupOptionValue(DefElem *defel) { int64 value; @@ -790,6 +799,8 @@ getResgroupOptionName(ResGroupLimitType type) return "cpuset"; case RESGROUP_LIMIT_TYPE_CPU_SHARES: return "cpu_soft_priority"; + case RESGROUP_LIMIT_TYPE_MEMORY_LIMIT: + return "memory_limit"; default: return "unknown"; } @@ -830,6 +841,9 @@ checkResgroupCapLimit(ResGroupLimitType type, int value) RESGROUP_MIN_CPU_SOFT_PRIORITY))); break; + case RESGROUP_LIMIT_TYPE_MEMORY_LIMIT: + break; + default: Assert(!"unexpected options"); break; @@ -874,11 +888,10 @@ parseStmtOptions(CreateResourceGroupStmt *stmt, ResGroupCaps *caps) checkCpuSetByRole(cpuset); caps->cpuHardQuotaLimit = CPU_HARD_QUOTA_LIMIT_DISABLED; caps->cpuSoftPriority = RESGROUP_DEFAULT_CPU_SOFT_PRIORITY; - } else { - value = getResgroupOptionValue(defel, type); + value = getResgroupOptionValue(defel); checkResgroupCapLimit(type, value); switch (type) @@ -893,6 +906,9 @@ parseStmtOptions(CreateResourceGroupStmt *stmt, ResGroupCaps *caps) case RESGROUP_LIMIT_TYPE_CPU_SHARES: caps->cpuSoftPriority = value; break; + case RESGROUP_LIMIT_TYPE_MEMORY_LIMIT: + caps->memory_limit = value; + break; default: break; } @@ -917,6 +933,9 @@ parseStmtOptions(CreateResourceGroupStmt *stmt, ResGroupCaps *caps) if (!(mask & (1 << RESGROUP_LIMIT_TYPE_CONCURRENCY))) caps->concurrency = RESGROUP_DEFAULT_CONCURRENCY; + if (!(mask & (1 << RESGROUP_LIMIT_TYPE_MEMORY_LIMIT))) + caps->memory_limit = -1; + if ((mask & (1 << RESGROUP_LIMIT_TYPE_CPU)) && !(mask & (1 << RESGROUP_LIMIT_TYPE_CPU_SHARES))) caps->cpuSoftPriority = RESGROUP_DEFAULT_CPU_SOFT_PRIORITY; @@ -1005,6 +1024,10 @@ insertResgroupCapabilities(Relation rel, Oid groupId, ResGroupCaps *caps) insertResgroupCapabilityEntry(rel, groupId, RESGROUP_LIMIT_TYPE_CPUSET, caps->cpuset); + + snprintf(value, sizeof(value), "%d", caps->memory_limit); + insertResgroupCapabilityEntry(rel, groupId, + RESGROUP_LIMIT_TYPE_MEMORY_LIMIT, value); } /* diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 7ebc37b1436..ac611cf55af 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -1730,14 +1730,18 @@ OptResourceGroupElem: { $$ = makeDefElem("cpu_hard_quota_limit", (Node *) makeInteger($2), @1); } - | CPU_SOFT_PRIORITY SignedIconst - { - $$ = makeDefElem("cpu_soft_priority", (Node *) makeInteger($2), @1); - } + | CPU_SOFT_PRIORITY SignedIconst + { + $$ = makeDefElem("cpu_soft_priority", (Node *) makeInteger($2), @1); + } | CPUSET Sconst { $$ = makeDefElem("cpuset", (Node *) makeString($2), @1); } + | MEMORY_LIMIT SignedIconst + { + $$ = makeDefElem("memory_limit", (Node *) makeInteger($2), @1); + } ; /***************************************************************************** diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 9ea46f041a3..a39e3702766 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -1687,6 +1687,18 @@ struct config_bool ConfigureNamesBool_gp[] = NULL, NULL, NULL }, + { + + {"gp_log_resgroup_memory", PGC_USERSET, LOGGING_WHAT, + gettext_noop("Prints out messages related to resource group's memory management."), + NULL, + GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE + }, + &gp_log_resgroup_memory, + false, + NULL, NULL, NULL + }, + { {"gp_resqueue_print_operator_memory_limits", PGC_USERSET, LOGGING_WHAT, gettext_noop("Prints out the memory limit for operators (in explain) assigned by resource queue's " @@ -4037,6 +4049,17 @@ struct config_int ConfigureNamesInt_gp[] = NULL, NULL, NULL }, + { + {"gp_resgroup_memory_query_fixed_mem", PGC_USERSET, RESOURCES_MEM, + gettext_noop("Sets the fixed amount of memory reserved for a query."), + NULL, + GUC_UNIT_KB | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE + }, + &gp_resgroup_memory_query_fixed_mem, + 0, 0, INT_MAX, + NULL, NULL, NULL + }, + { {"gp_resqueue_memory_policy_auto_fixed_mem", PGC_USERSET, RESOURCES_MEM, gettext_noop("Sets the fixed amount of memory reserved for non-memory intensive operators in the AUTO policy."), @@ -4047,6 +4070,17 @@ struct config_int ConfigureNamesInt_gp[] = 100, 50, INT_MAX, NULL, NULL, NULL }, + { + {"gp_resgroup_memory_policy_auto_fixed_mem", PGC_USERSET, RESOURCES_MEM, + gettext_noop("Sets the fixed amount of memory reserved for non-memory intensive operators in the AUTO policy."), + NULL, + GUC_UNIT_KB | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE + }, + &gp_resgroup_memory_policy_auto_fixed_mem, + 100, 50, INT_MAX, + NULL, NULL, NULL + }, + { {"gp_global_deadlock_detector_period", PGC_SIGHUP, LOCK_MANAGEMENT, @@ -4934,6 +4968,15 @@ struct config_enum ConfigureNamesEnum_gp[] = NULL, NULL, NULL }, + { + {"gp_resgroup_memory_policy", PGC_SUSET, RESOURCES_MGM, + gettext_noop("Sets the policy for memory allocation of queries."), + gettext_noop("Valid values are AUTO, EAGER_FREE.") + }, + &gp_resgroup_memory_policy, + RESMANAGER_MEMORY_POLICY_EAGER_FREE, gp_resqueue_memory_policies, NULL, NULL + }, + { {"optimizer_join_order", PGC_USERSET, QUERY_TUNING_OTHER, gettext_noop("Set optimizer join heuristic model."), diff --git a/src/backend/utils/resgroup/resgroup.c b/src/backend/utils/resgroup/resgroup.c index 6a857d13662..638d307af94 100644 --- a/src/backend/utils/resgroup/resgroup.c +++ b/src/backend/utils/resgroup/resgroup.c @@ -62,6 +62,12 @@ /* * GUC variables. */ +int gp_resgroup_memory_policy = RESMANAGER_MEMORY_POLICY_NONE; +bool gp_log_resgroup_memory = false; +int gp_resgroup_memory_query_fixed_mem; +int gp_resgroup_memory_policy_auto_fixed_mem; +bool gp_resgroup_print_operator_memory_limits = false; + bool gp_resgroup_debug_wait_queue = true; int gp_resource_group_queuing_timeout = 0; @@ -1420,6 +1426,8 @@ SerializeResGroupInfo(StringInfo str) appendBinaryStringInfo(str, (char *) &itmp, sizeof(int32)); itmp = htonl(caps->cpuSoftPriority); appendBinaryStringInfo(str, (char *) &itmp, sizeof(int32)); + itmp = htonl(caps->memory_limit); + appendBinaryStringInfo(str, (char *) &itmp, sizeof(int32)); cpuset_len = strlen(caps->cpuset); itmp = htonl(cpuset_len); @@ -1456,6 +1464,9 @@ DeserializeResGroupInfo(struct ResGroupCaps *capsOut, capsOut->cpuHardQuotaLimit = ntohl(itmp); memcpy(&itmp, ptr, sizeof(int32)); ptr += sizeof(int32); capsOut->cpuSoftPriority = ntohl(itmp); + memcpy(&itmp, ptr, sizeof(int32)); ptr += sizeof(int32); + capsOut->memory_limit = ntohl(itmp); + memcpy(&itmp, ptr, sizeof(int32)); ptr += sizeof(int32); cpuset_len = ntohl(itmp); @@ -3284,3 +3295,41 @@ ResGroupGetGroupIdBySessionId(int sessionId) return groupId; } + +/* + * In resource group mode, how much memory should a query take in bytes. + */ +uint64 +ResourceGroupGetQueryMemoryLimit(void) +{ + ResGroupCaps *caps; + int64 resgLimit = -1; + uint64 queryMem = -1; + + Assert(Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_UTILITY); + + if (bypassedGroup) + return 0; + + if (gp_resgroup_memory_query_fixed_mem) + return (uint64) gp_resgroup_memory_query_fixed_mem * 1024L; + + Assert(selfIsAssigned()); + + LWLockAcquire(ResGroupLock, LW_SHARED); + + caps = &self->group->caps; + resgLimit = caps->memory_limit; + + AssertImply(resgLimit < 0, resgLimit == -1); + if (resgLimit == -1) + { + LWLockRelease(ResGroupLock); + return (uint64) statement_mem * 1024L; + } + + queryMem = (uint64)(resgLimit *1024L *1024L / caps->concurrency); + LWLockRelease(ResGroupLock); + + return queryMem; +} diff --git a/src/backend/utils/resource_manager/memquota.c b/src/backend/utils/resource_manager/memquota.c index 6856d06423e..1d9aa22137d 100644 --- a/src/backend/utils/resource_manager/memquota.c +++ b/src/backend/utils/resource_manager/memquota.c @@ -904,6 +904,8 @@ ResourceManagerGetQueryMemoryLimit(PlannedStmt* stmt) if (IsResQueueEnabled()) return ResourceQueueGetQueryMemoryLimit(stmt, ActivePortal->queueId); + else if (IsResGroupEnabled()) + return ResourceGroupGetQueryMemoryLimit(); /* RG FIXME: should we return statement_mem every time? */ return (uint64) statement_mem * 1024L; diff --git a/src/backend/utils/resource_manager/resource_manager.c b/src/backend/utils/resource_manager/resource_manager.c index 1cda79d406f..c52a58461c0 100644 --- a/src/backend/utils/resource_manager/resource_manager.c +++ b/src/backend/utils/resource_manager/resource_manager.c @@ -76,6 +76,10 @@ InitResManager(void) * checkpointer, ftsprobe and filerep processes. Wal sender acts like a backend, * so we also need to exclude it. */ + gp_resmanager_memory_policy = (ResManagerMemoryPolicy *) &gp_resgroup_memory_policy; + gp_log_resmanager_memory = &gp_log_resgroup_memory; + gp_resmanager_memory_policy_auto_fixed_mem = &gp_resgroup_memory_policy_auto_fixed_mem; + gp_resmanager_print_operator_memory_limits = &gp_resgroup_print_operator_memory_limits; InitResGroups(); diff --git a/src/include/catalog/pg_resgroup.h b/src/include/catalog/pg_resgroup.h index 3625762112e..8e39e678515 100644 --- a/src/include/catalog/pg_resgroup.h +++ b/src/include/catalog/pg_resgroup.h @@ -55,6 +55,7 @@ typedef enum ResGroupLimitType RESGROUP_LIMIT_TYPE_CPU, /* cpu_hard_quota_limit */ RESGROUP_LIMIT_TYPE_CPU_SHARES, /* cpu_soft_priority */ RESGROUP_LIMIT_TYPE_CPUSET, /* cpuset */ + RESGROUP_LIMIT_TYPE_MEMORY_LIMIT, /* memory_limit */ RESGROUP_LIMIT_TYPE_COUNT, } ResGroupLimitType; diff --git a/src/include/catalog/pg_resgroupcapability.dat b/src/include/catalog/pg_resgroupcapability.dat index 514ad35801c..8b35b892f00 100644 --- a/src/include/catalog/pg_resgroupcapability.dat +++ b/src/include/catalog/pg_resgroupcapability.dat @@ -16,13 +16,16 @@ { resgroupid => '6437', reslimittype => '2', value => '20' }, { resgroupid => '6437', reslimittype => '3', value => '100' }, { resgroupid => '6437', reslimittype => '4', value => '-1' }, +{ resgroupid => '6437', reslimittype => '5', value => '-1' }, { resgroupid => '6438', reslimittype => '1', value => '10' }, { resgroupid => '6438', reslimittype => '2', value => '10' }, { resgroupid => '6438', reslimittype => '3', value => '100' }, { resgroupid => '6438', reslimittype => '4', value => '-1' }, +{ resgroupid => '6438', reslimittype => '5', value => '-1' }, { resgroupid => '6448', reslimittype => '1', value => '0' }, { resgroupid => '6448', reslimittype => '2', value => '10' }, { resgroupid => '6448', reslimittype => '3', value => '100' }, { resgroupid => '6448', reslimittype => '4', value => '-1' }, +{ resgroupid => '6448', reslimittype => '5', value => '-1' }, ] diff --git a/src/include/utils/resgroup.h b/src/include/utils/resgroup.h index 216fc29a6a1..9803bdfde23 100644 --- a/src/include/utils/resgroup.h +++ b/src/include/utils/resgroup.h @@ -69,6 +69,7 @@ typedef struct ResGroupCaps ResGroupCap concurrency; ResGroupCap cpuHardQuotaLimit; ResGroupCap cpuSoftPriority; + ResGroupCap memory_limit; char cpuset[MaxCpuSetLength]; } ResGroupCaps; @@ -81,6 +82,11 @@ typedef struct ResGroupCaps /* * GUC variables. */ +extern int gp_resgroup_memory_policy; +extern bool gp_log_resgroup_memory; +extern int gp_resgroup_memory_query_fixed_mem; +extern int gp_resgroup_memory_policy_auto_fixed_mem; +extern bool gp_resgroup_print_operator_memory_limits; extern bool gp_resgroup_debug_wait_queue; extern int gp_resource_group_cpu_priority; @@ -160,6 +166,7 @@ extern void ResGroupDropFinish(const ResourceGroupCallbackContext *callbackCtx, extern void ResGroupCreateOnAbort(const ResourceGroupCallbackContext *callbackCtx); extern void ResGroupAlterOnCommit(const ResourceGroupCallbackContext *callbackCtx); extern void ResGroupCheckForDrop(Oid groupId, char *name); +extern uint64 ResourceGroupGetQueryMemoryLimit(void); /* * Get resource group id of my proc. diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 41358876b1a..ee65aa1434b 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -88,13 +88,14 @@ "gp_interconnect_type", "gp_log_endpoints", "gp_log_interconnect", + "gp_log_resgroup_memory", "gp_log_resqueue_memory", "gp_log_stack_trace_lines", "gp_log_suboverflow_statement", "gp_max_packet_size", "gp_motion_slice_noop", "gp_resgroup_debug_wait_queue", - "gp_resource_group_bypass", + "gp_resgroup_memory_policy_auto_fixed_mem", "gp_resqueue_memory_policy_auto_fixed_mem", "gp_resqueue_print_operator_memory_limits", "gp_select_invisible", diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index e7ef6f0ff87..14db7340bf4 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -242,6 +242,9 @@ "gp_reject_internal_tcp_connection", "gp_reject_percent_threshold", "gp_reraise_signal", + "gp_resgroup_memory_policy", + "gp_resgroup_memory_query_fixed_mem", + "gp_resource_group_bypass", "gp_resource_group_cpu_limit", "gp_resource_group_cpu_priority", "gp_resource_group_enable_cgroup_version_two", diff --git a/src/test/isolation2/expected/resgroup/resgroup_memory_limit.out b/src/test/isolation2/expected/resgroup/resgroup_memory_limit.out new file mode 100644 index 00000000000..43cd09e9199 --- /dev/null +++ b/src/test/isolation2/expected/resgroup/resgroup_memory_limit.out @@ -0,0 +1,64 @@ +-- test memory limit +-- start_ignore +DROP ROLE IF EXISTS role_memory_test; +DROP +DROP RESOURCE GROUP rg_memory_test; +ERROR: resource group "rg_memory_test" does not exist +DROP TABLE t_memory_limit; +ERROR: table "t_memory_limit" does not exist +-- end_ignore + +-- create a pl function to show the memory used by a process +CREATE OR REPLACE FUNCTION func_memory_test (text) RETURNS text as /*in func*/ $$ /*in func*/ DECLARE /*in func*/ ln text; /*in func*/ tmp text[]; /*in func*/ match bool := false; /*in func*/ BEGIN /*in func*/ FOR ln IN execute format('explain analyze %s', $1) LOOP /*in func*/ IF NOT match THEN /*in func*/ tmp := regexp_match(ln, 'Memory used: (.*)'); /*in func*/ IF tmp IS NOT null THEN /*in func*/ match := true; /*in func*/ END IF; /*in func*/ END IF; /*in func*/ END LOOP; /*in func*/ RETURN tmp[1]; /*in func*/ END; /*in func*/ $$ /*in func*/ LANGUAGE plpgsql; +CREATE + +-- create a resource group with memory limit 100 Mb +CREATE RESOURCE GROUP rg_memory_test WITH(memory_limit=100, cpu_hard_quota_limit=20, concurrency=2); +CREATE +CREATE ROLE role_memory_test RESOURCE GROUP rg_memory_test; +CREATE + +-- session1: explain memory used by query +1: SET ROLE TO role_memory_test; +SET +1: CREATE TABLE t_memory_limit(a int); +CREATE +1: BEGIN; +BEGIN +1: SELECT func_memory_test('SELECT * FROM t_memory_limit'); + func_memory_test +------------------ + 51200kB +(1 row) + +-- session2: test alter resource group's memory limit +2:ALTER RESOURCE GROUP rg_memory_test SET memory_limit 200; +ALTER + +-- memory used will grow up to 100 Mb +1: SELECT func_memory_test('SELECT * FROM t_memory_limit'); + func_memory_test +------------------ + 102400kB +(1 row) +1: END; +END +-- set gp_resgroup_memory_query_fixed_mem to 200MB +1: SET gp_resgroup_memory_query_fixed_mem to 204800; +SET +1: SELECT func_memory_test('SELECT * FROM t_memory_limit'); + func_memory_test +------------------ + 204800kB +(1 row) +1: RESET ROLE; +RESET +-- clean +DROP FUNCTION func_memory_test(text); +DROP +DROP TABLE t_memory_limit; +DROP +DROP ROLE IF EXISTS role_memory_test; +DROP +DROP RESOURCE GROUP rg_memory_test; +DROP diff --git a/src/test/isolation2/isolation2_resgroup_schedule b/src/test/isolation2/isolation2_resgroup_schedule index f2d315d1361..737f46b1637 100644 --- a/src/test/isolation2/isolation2_resgroup_schedule +++ b/src/test/isolation2/isolation2_resgroup_schedule @@ -20,6 +20,7 @@ test: resgroup/resgroup_cpu_rate_limit test: resgroup/resgroup_cpuset test: resgroup/resgroup_cpuset_empty_default test: resgroup/resgroup_cancel_terminate_concurrency +test: resgroup/resgroup_memory_limit test: resgroup/resgroup_move_query # regression tests diff --git a/src/test/isolation2/sql/resgroup/resgroup_memory_limit.sql b/src/test/isolation2/sql/resgroup/resgroup_memory_limit.sql new file mode 100644 index 00000000000..3acdab80214 --- /dev/null +++ b/src/test/isolation2/sql/resgroup/resgroup_memory_limit.sql @@ -0,0 +1,53 @@ +-- test memory limit +-- start_ignore +DROP ROLE IF EXISTS role_memory_test; +DROP RESOURCE GROUP rg_memory_test; +DROP TABLE t_memory_limit; +-- end_ignore + +-- create a pl function to show the memory used by a process +CREATE OR REPLACE FUNCTION func_memory_test (text) RETURNS text as /*in func*/ +$$ /*in func*/ +DECLARE /*in func*/ + ln text; /*in func*/ + tmp text[]; /*in func*/ + match bool := false; /*in func*/ +BEGIN /*in func*/ + FOR ln IN execute format('explain analyze %s', $1) LOOP /*in func*/ + IF NOT match THEN /*in func*/ + tmp := regexp_match(ln, 'Memory used: (.*)'); /*in func*/ + IF tmp IS NOT null THEN /*in func*/ + match := true; /*in func*/ + END IF; /*in func*/ + END IF; /*in func*/ + END LOOP; /*in func*/ + RETURN tmp[1]; /*in func*/ +END; /*in func*/ +$$ /*in func*/ +LANGUAGE plpgsql; + +-- create a resource group with memory limit 100 Mb +CREATE RESOURCE GROUP rg_memory_test WITH(memory_limit=100, cpu_hard_quota_limit=20, concurrency=2); +CREATE ROLE role_memory_test RESOURCE GROUP rg_memory_test; + +-- session1: explain memory used by query +1: SET ROLE TO role_memory_test; +1: CREATE TABLE t_memory_limit(a int); +1: BEGIN; +1: SELECT func_memory_test('SELECT * FROM t_memory_limit'); + +-- session2: test alter resource group's memory limit +2:ALTER RESOURCE GROUP rg_memory_test SET memory_limit 200; + +-- memory used will grow up to 100 Mb +1: SELECT func_memory_test('SELECT * FROM t_memory_limit'); +1: END; +-- set gp_resgroup_memory_query_fixed_mem to 200MB +1: SET gp_resgroup_memory_query_fixed_mem to 204800; +1: SELECT func_memory_test('SELECT * FROM t_memory_limit'); +1: RESET ROLE; +-- clean +DROP FUNCTION func_memory_test(text); +DROP TABLE t_memory_limit; +DROP ROLE IF EXISTS role_memory_test; +DROP RESOURCE GROUP rg_memory_test; From fe58f698a9860955c77120a1fb40235f39bc2148 Mon Sep 17 00:00:00 2001 From: Zijie Date: Tue, 18 Jun 2024 09:45:58 +0800 Subject: [PATCH 46/46] Fix merge GPDB (resgroup related) --- src/backend/cdb/cdbutil.c | 4 +- src/backend/cdb/dispatcher/cdbgang.c | 12 +-- src/bin/gpfts/ftsprobe.c | 55 +++++++++--- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_proc.dat | 2 +- .../isolation2/expected/fts_segment_reset.out | 3 +- src/test/recovery/t/018_wal_optimize.pl | 39 ++++---- src/test/recovery/t/021_row_visibility.pl | 88 ++++++++++--------- src/test/recovery/t/023_pitr_prepared_xact.pl | 6 ++ .../regress/expected/gp_prepared_xacts.out | 2 +- src/test/regress/input/dispatch.source | 4 - src/test/regress/output/dispatch.source | 22 ----- src/test/singlenode_regress/expected/temp.out | 22 ++--- 13 files changed, 139 insertions(+), 122 deletions(-) diff --git a/src/backend/cdb/cdbutil.c b/src/backend/cdb/cdbutil.c index 152e04be6bb..5af92fb69f1 100644 --- a/src/backend/cdb/cdbutil.c +++ b/src/backend/cdb/cdbutil.c @@ -1965,7 +1965,7 @@ gp_get_suboverflowed_backends(PG_FUNCTION_ARGS) LWLockAcquire(ProcArrayLock, LW_SHARED); for (i = 0; i < ProcGlobal->allProcCount; i++) { - if (ProcGlobal->allPgXact[i].overflowed) + if (ProcGlobal->subxidStates[i].overflowed) astate = accumArrayResult(astate, Int32GetDatum(ProcGlobal->allProcs[i].pid), false, INT4OID, CurrentMemoryContext); @@ -4181,7 +4181,7 @@ gp_get_suboverflowed_backends(PG_FUNCTION_ARGS) LWLockAcquire(ProcArrayLock, LW_SHARED); for (i = 0; i < ProcGlobal->allProcCount; i++) { - if (ProcGlobal->allPgXact[i].overflowed) + if (ProcGlobal->subxidStates[i].overflowed) astate = accumArrayResult(astate, Int32GetDatum(ProcGlobal->allProcs[i].pid), false, INT4OID, CurrentMemoryContext); diff --git a/src/backend/cdb/dispatcher/cdbgang.c b/src/backend/cdb/dispatcher/cdbgang.c index f6149958b0c..f838ad5fadd 100644 --- a/src/backend/cdb/dispatcher/cdbgang.c +++ b/src/backend/cdb/dispatcher/cdbgang.c @@ -972,13 +972,13 @@ backend_type(SegmentDatabaseDescriptor *segdb) } /* - * qsort comparator for SegmentDatabaseDescriptors. Sorts by descriptor ID. + * sort comparator for SegmentDatabaseDescriptors. Sorts by descriptor ID. */ static int -compare_segdb_id(const void *v1, const void *v2) +compare_segdb_id(const ListCell *a, const ListCell *b) { - SegmentDatabaseDescriptor *d1 = (SegmentDatabaseDescriptor *) lfirst(*(ListCell **) v1); - SegmentDatabaseDescriptor *d2 = (SegmentDatabaseDescriptor *) lfirst(*(ListCell **) v2); + SegmentDatabaseDescriptor *d1 = (SegmentDatabaseDescriptor *) lfirst(a); + SegmentDatabaseDescriptor *d2 = (SegmentDatabaseDescriptor *) lfirst(b); return d1->identifier - d2->identifier; } @@ -1060,7 +1060,7 @@ gp_backend_info(PG_FUNCTION_ARGS) * For a slightly better default user experience, sort by descriptor ID. * Users may of course specify their own ORDER BY if they don't like it. */ - user_fctx->segdbs = list_qsort(user_fctx->segdbs, compare_segdb_id); + list_sort(user_fctx->segdbs, compare_segdb_id); user_fctx->curpos = list_head(user_fctx->segdbs); /* Create a descriptor for the records we'll be returning. */ @@ -1095,7 +1095,7 @@ gp_backend_info(PG_FUNCTION_ARGS) /* Get the next descriptor. */ dbdesc = lfirst(user_fctx->curpos); - user_fctx->curpos = lnext(user_fctx->curpos); + user_fctx->curpos = lnext(user_fctx->segdbs, user_fctx->curpos); /* Fill in the row attributes. */ dbinfo = dbdesc->segment_database_info; diff --git a/src/bin/gpfts/ftsprobe.c b/src/bin/gpfts/ftsprobe.c index ee0d8b746b4..8b110ba79a0 100644 --- a/src/bin/gpfts/ftsprobe.c +++ b/src/bin/gpfts/ftsprobe.c @@ -32,6 +32,7 @@ #include /* for MAXHOSTNAMELEN */ #include "fts_etcd.h" #include +#include "postmaster/postmaster.h" static struct pollfd *PollFds; @@ -284,8 +285,15 @@ checkIfConnFailedDueToRecoveryInProgress(PGconn *conn) && strstr(PQerrorMessage(conn),POSTMASTER_AFTER_PROMOTE_STANDBY_IN_RECOVERY_DETAIL_MSG); } +/* + * Check if the primary segment is restarting normally by examing the PQ error message. + * It could be that they are in RESET (waiting for the children to exit) or making + * progress in RECOVERY. Note there is no good source of RESET progress indications + * that we could check, so we simply always allow it. Normally RESET should be fast + * and there's a timeout in postmaster to guard against long wait. + */ static void -checkIfFailedDueToRecoveryInProgress(fts_segment_info *fts_info) +checkIfFailedDueToNormalRestart(fts_segment_info *fts_info) { if (strstr(PQerrorMessage(fts_info->conn), _(POSTMASTER_IN_RECOVERY_MSG)) || strstr(PQerrorMessage(fts_info->conn), _(POSTMASTER_IN_STARTUP_MSG))) @@ -322,6 +330,7 @@ checkIfFailedDueToRecoveryInProgress(fts_segment_info *fts_info) */ if (tmpptr <= fts_info->xlogrecptr) { + fts_info->restart_state = PM_IN_RECOVERY_NOT_MAKING_PROGRESS; cbdb_log_debug("detected segment is in recovery mode and not making progress (content=%d) " "primary dbid=%d, mirror dbid=%d", PRIMARY_CONFIG(fts_info)->segindex, @@ -330,7 +339,7 @@ checkIfFailedDueToRecoveryInProgress(fts_segment_info *fts_info) } else { - fts_info->recovery_making_progress = true; + fts_info->restart_state = PM_IN_RECOVERY_MAKING_PROGRESS; fts_info->xlogrecptr = tmpptr; cbdb_log_debug("detected segment is in recovery mode replayed (%X/%X) (content=%d) " @@ -342,6 +351,15 @@ checkIfFailedDueToRecoveryInProgress(fts_segment_info *fts_info) fts_info->has_mirror_configured ? MIRROR_CONFIG(fts_info)->dbid : -1); } } + else if (strstr(PQerrorMessage(fts_info->conn), _(POSTMASTER_IN_RESET_MSG))) + { + fts_info->restart_state = PM_IN_RESETTING; + cbdb_log_debug("FTS: detected segment is in RESET state (content=%d) " + "primary dbid=%d, mirror dbid=%d", + fts_info->primary_cdbinfo->config->segindex, + fts_info->primary_cdbinfo->config->dbid, + fts_info->mirror_cdbinfo->config->dbid); + } } /* @@ -376,10 +394,11 @@ ftsConnect(fts_context *context) case FTS_SYNCREP_OFF_SEGMENT: case FTS_PROMOTE_SEGMENT: /* - * We always default to false. If connect fails due to recovery in progress - * this variable will be set based on LSN value in error message. + * We always default to PM_NOT_IN_RESTART. If connect fails, we then check + * the primary's restarting state, so we can skip promoting mirror if it's in + * PM_IN_RESETTING or PM_IN_RECOVERY_MAKING_PROGRESS. */ - fts_info->recovery_making_progress = false; + fts_info->restart_state = PM_NOT_IN_RESTART; if (fts_info->conn == NULL) { Assert(fts_info->retry_count <= context->config->probe_retries); @@ -426,7 +445,7 @@ ftsConnect(fts_context *context) case PGRES_POLLING_FAILED: fts_info->state = nextFailedState(fts_info->state); - checkIfFailedDueToRecoveryInProgress(fts_info); + checkIfFailedDueToNormalRestart(fts_info); cbdb_log_debug("cannot establish libpq connection " "(content=%d, dbid=%d): %s, retry_count=%d", PRIMARY_CONFIG(fts_info)->segindex, @@ -1266,11 +1285,21 @@ processResponse(fts_context *context) } break; case FTS_PROBE_FAILED: - /* - * Primary is down - * If primary is in recovery, do not mark it down and promote mirror - */ - if (fts_info->recovery_making_progress) + /* Primary is down */ + + /* If primary is in resetting or making progress in recovery, do not mark it down and promote mirror */ + if (fts_info->restart_state == PM_IN_RESETTING) + { + Assert(strstr(PQerrorMessage(fts_info->conn), _(POSTMASTER_IN_RESET_MSG))); + cbdb_log_debug( + "FTS: detected segment is in resetting mode " + "(content=%d) primary dbid=%d, mirror dbid=%d", + primary->config->segindex, primary->config->dbid, mirror->config->dbid); + + fts_info->state = FTS_RESPONSE_PROCESSED; + break; + } + else if (fts_info->restart_state == PM_IN_RECOVERY_MAKING_PROGRESS) { assert(strstr(PQerrorMessage(fts_info->conn), _(POSTMASTER_IN_RECOVERY_MSG)) || strstr(PQerrorMessage(fts_info->conn), _(POSTMASTER_IN_STARTUP_MSG))); @@ -1467,7 +1496,7 @@ FtsWalRepInitProbeContext(CdbComponentDatabases *cdbs, fts_context *context, fts fts_info->state = FTS_PROBE_FAILED; else fts_info->state = FTS_PROBE_SEGMENT; - fts_info->recovery_making_progress = false; + fts_info->restart_state = PM_NOT_IN_RESTART; fts_info->xlogrecptr = InvalidXLogRecPtr; fts_info->primary_cdbinfo = primary; @@ -1495,7 +1524,7 @@ FtsWalRepInitProbeContext(CdbComponentDatabases *cdbs, fts_context *context, fts fts_info->state = FTS_PROBE_FAILED; else fts_info->state = FTS_PROBE_SEGMENT; - fts_info->recovery_making_progress = false; + fts_info->restart_state = PM_NOT_IN_RESTART; fts_info->xlogrecptr = InvalidXLogRecPtr; fts_info->primary_cdbinfo = master; diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 1acb192ff70..05f541da185 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -56,6 +56,6 @@ */ /* 3yyymmddN */ -#define CATALOG_VERSION_NO 302301101 +#define CATALOG_VERSION_NO 302406171 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 19110f7f0ad..b36f4c88388 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11963,7 +11963,7 @@ { oid => 7182, descr => 'wait until all endpoint of this parallel retrieve cursor has been retrieved finished', proname => 'gp_wait_parallel_retrieve_cursor', provolatile => 'v', proparallel => 'u', prorettype => 'bool', proargtypes => 'text int4', proallargtypes => '{text,int4,bool}', proargmodes => '{i,i,o}', proargnames => '{cursorname,timeout_sec,finished}', prosrc => 'gp_wait_parallel_retrieve_cursor', proexeclocation => 'c' }, -{ oid => 7183, descr => 'debugging information for segment backends', +{ oid => 7146, descr => 'debugging information for segment backends', proname => 'gp_backend_info', prorettype => 'record', prorows => '1', proretset => 't', proargtypes => '', proallargtypes => '{int4,char,int4,text,int4,int4}', prosrc => 'gp_backend_info', pronargs => 6, proargnames => '{id,type,content,host,port,pid}', proargmodes => '{o,o,o,o,o,o}', proexeclocation => 'c'} diff --git a/src/test/isolation2/expected/fts_segment_reset.out b/src/test/isolation2/expected/fts_segment_reset.out index 8d575ccfddc..47cd2fa8294 100644 --- a/src/test/isolation2/expected/fts_segment_reset.out +++ b/src/test/isolation2/expected/fts_segment_reset.out @@ -53,8 +53,7 @@ CREATE 1<: <... completed> ERROR: fault triggered, fault name:'start_prepare' fault type:'panic' 2<: <... completed> -DETAIL: Segments are in reset/recovery mode. -ERROR: failed to acquire resources on one or more segments +CREATE -- We shouldn't see failover to mirror select gp_request_fts_probe_scan(); diff --git a/src/test/recovery/t/018_wal_optimize.pl b/src/test/recovery/t/018_wal_optimize.pl index 08ab5d5611c..6af2abcfa61 100644 --- a/src/test/recovery/t/018_wal_optimize.pl +++ b/src/test/recovery/t/018_wal_optimize.pl @@ -15,7 +15,11 @@ use PostgresNode; use TestLib; -use Test::More tests => 38; + +# GPDB: Effectively disable some of these tests. We cannot run +# PREPARE TRANSACTION in utility-mode. +# use Test::More tests => 38; +use Test::More tests => 36; sub check_orphan_relfilenodes { @@ -115,22 +119,23 @@ sub run_wal_optimize "SELECT count(*), min(id) FROM trunc_ins;"); is($result, qq(1|2), "wal_level = $wal_level, TRUNCATE INSERT"); - # Same for prepared transaction. - # Tuples inserted after the truncation should be seen. - $node->safe_psql( - 'postgres', " - BEGIN; - CREATE TABLE twophase (id serial PRIMARY KEY); - INSERT INTO twophase VALUES (DEFAULT); - TRUNCATE twophase; - INSERT INTO twophase VALUES (DEFAULT); - PREPARE TRANSACTION 't'; - COMMIT PREPARED 't';"); - $node->stop('immediate'); - $node->start; - $result = $node->safe_psql('postgres', - "SELECT count(*), min(id) FROM trunc_ins;"); - is($result, qq(1|2), "wal_level = $wal_level, TRUNCATE INSERT PREPARE"); + # GPDB: Disable this test. + # # Same for prepared transaction. + # # Tuples inserted after the truncation should be seen. + # $node->safe_psql( + # 'postgres', " + # BEGIN; + # CREATE TABLE twophase (id serial PRIMARY KEY); + # INSERT INTO twophase VALUES (DEFAULT); + # TRUNCATE twophase; + # INSERT INTO twophase VALUES (DEFAULT); + # PREPARE TRANSACTION 't'; + # COMMIT PREPARED 't';"); + # $node->stop('immediate'); + # $node->start; + # $result = $node->safe_psql('postgres', + # "SELECT count(*), min(id) FROM trunc_ins;"); + # is($result, qq(1|2), "wal_level = $wal_level, TRUNCATE INSERT PREPARE"); # Writing WAL at end of xact, instead of syncing. $node->safe_psql( diff --git a/src/test/recovery/t/021_row_visibility.pl b/src/test/recovery/t/021_row_visibility.pl index 8a554c5aae6..4355f0b3dcf 100644 --- a/src/test/recovery/t/021_row_visibility.pl +++ b/src/test/recovery/t/021_row_visibility.pl @@ -8,7 +8,10 @@ use PostgresNode; use TestLib; -use Test::More tests => 10; +# GPDB: Effectively disable some of these tests. We cannot run +# PREPARE TRANSACTION in utility-mode. +# use Test::More tests => 10; +use Test::More tests => 6; use Config; # Initialize primary node @@ -118,47 +121,48 @@ qr/first update\n\(1 row\)$/m), 'committed update visible'); -# -# 5. Check that changes in prepared xacts is invisible -# -ok( send_query_and_wait( - \%psql_primary, q[ -DELETE from test_visibility; -- delete old data, so we start with clean slate -BEGIN; -INSERT INTO test_visibility VALUES('inserted in prepared will_commit'); -PREPARE TRANSACTION 'will_commit';], - qr/^PREPARE TRANSACTION$/m), - 'prepared will_commit'); - -ok( send_query_and_wait( - \%psql_primary, q[ -BEGIN; -INSERT INTO test_visibility VALUES('inserted in prepared will_abort'); -PREPARE TRANSACTION 'will_abort'; - ], - qr/^PREPARE TRANSACTION$/m), - 'prepared will_abort'); - -$node_primary->wait_for_catchup($node_standby, 'replay', - $node_primary->lsn('insert')); - -ok( send_query_and_wait( - \%psql_standby, - q[SELECT * FROM test_visibility ORDER BY data;], - qr/^\(0 rows\)$/m), - 'uncommitted prepared invisible'); - -# For some variation, finish prepared xacts via separate connections -$node_primary->safe_psql('postgres', "COMMIT PREPARED 'will_commit';"); -$node_primary->safe_psql('postgres', "ROLLBACK PREPARED 'will_abort';"); -$node_primary->wait_for_catchup($node_standby, 'replay', - $node_primary->lsn('insert')); - -ok( send_query_and_wait( - \%psql_standby, - q[SELECT * FROM test_visibility ORDER BY data;], - qr/will_commit.*\n\(1 row\)$/m), - 'finished prepared visible'); +# GPDB: Disable this test. +# # +# # 5. Check that changes in prepared xacts is invisible +# # +# ok( send_query_and_wait( +# \%psql_primary, q[ +# DELETE from test_visibility; -- delete old data, so we start with clean slate +# BEGIN; +# INSERT INTO test_visibility VALUES('inserted in prepared will_commit'); +# PREPARE TRANSACTION 'will_commit';], +# qr/^PREPARE TRANSACTION$/m), +# 'prepared will_commit'); + +# ok( send_query_and_wait( +# \%psql_primary, q[ +# BEGIN; +# INSERT INTO test_visibility VALUES('inserted in prepared will_abort'); +# PREPARE TRANSACTION 'will_abort'; +# ], +# qr/^PREPARE TRANSACTION$/m), +# 'prepared will_abort'); + +# $node_primary->wait_for_catchup($node_standby, 'replay', +# $node_primary->lsn('insert')); + +# ok( send_query_and_wait( +# \%psql_standby, +# q[SELECT * FROM test_visibility ORDER BY data;], +# qr/^\(0 rows\)$/m), +# 'uncommitted prepared invisible'); + +# # For some variation, finish prepared xacts via separate connections +# $node_primary->safe_psql('postgres', "COMMIT PREPARED 'will_commit';"); +# $node_primary->safe_psql('postgres', "ROLLBACK PREPARED 'will_abort';"); +# $node_primary->wait_for_catchup($node_standby, 'replay', +# $node_primary->lsn('insert')); + +# ok( send_query_and_wait( +# \%psql_standby, +# q[SELECT * FROM test_visibility ORDER BY data;], +# qr/will_commit.*\n\(1 row\)$/m), +# 'finished prepared visible'); # explicitly shut down psql instances gracefully - to avoid hangs # or worse on windows diff --git a/src/test/recovery/t/023_pitr_prepared_xact.pl b/src/test/recovery/t/023_pitr_prepared_xact.pl index 9190a38f93c..d27b469e249 100644 --- a/src/test/recovery/t/023_pitr_prepared_xact.pl +++ b/src/test/recovery/t/023_pitr_prepared_xact.pl @@ -6,7 +6,13 @@ use warnings; use PostgresNode; use TestLib; + +# GPDB: Effectively disable this TAP test. We cannot run PREPARE +# TRANSACTION in utility-mode. use Test::More tests => 1; +is(-1, -1, "Disable this TAP test"); +exit; + use File::Compare; # Initialize and start primary node with WAL archiving diff --git a/src/test/regress/expected/gp_prepared_xacts.out b/src/test/regress/expected/gp_prepared_xacts.out index b3a1b96bc46..217f941f0f4 100644 --- a/src/test/regress/expected/gp_prepared_xacts.out +++ b/src/test/regress/expected/gp_prepared_xacts.out @@ -1,7 +1,7 @@ -- PREPARE TRANSACTION should not work BEGIN; PREPARE TRANSACTION 'foo_prep_xact'; -ERROR: PREPARE TRANSACTION is not yet supported in Greenplum Database +ERROR: PREPARE TRANSACTION is not yet supported in Cloudberry Database -- PREPARE TRANSACTION should not work in utility-mode connections either \! PGOPTIONS='-c gp_role=utility' psql -X regression -c "BEGIN; PREPARE TRANSACTION 'foo_prep_xact';" ERROR: PREPARE TRANSACTION is not supported in utility mode diff --git a/src/test/regress/input/dispatch.source b/src/test/regress/input/dispatch.source index d058b8ea619..4068dc944e1 100644 --- a/src/test/regress/input/dispatch.source +++ b/src/test/regress/input/dispatch.source @@ -575,10 +575,6 @@ set optimizer=off; --gang reused create table t_create_gang_time(tc1 int,tc2 int); ---1-gang reused -select * from t_create_gang_time where tc1=1; -explain analyze select * from t_create_gang_time where tc1=1; - --n-gang reused and 1-gang is created. select * from t_create_gang_time t1, t_create_gang_time t2 where t1.tc1=2; diff --git a/src/test/regress/output/dispatch.source b/src/test/regress/output/dispatch.source index 4c94628f706..cd8d872d868 100644 --- a/src/test/regress/output/dispatch.source +++ b/src/test/regress/output/dispatch.source @@ -933,28 +933,6 @@ create table t_create_gang_time(tc1 int,tc2 int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'tc1' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INFO: (Gang) is reused ---1-gang reused -select * from t_create_gang_time where tc1=1; -INFO: (Slice1) is reused - tc1 | tc2 ------+----- -(0 rows) - -explain analyze select * from t_create_gang_time where tc1=1; -INFO: (Slice1) is reused - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------- - Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..393.90 rows=86 width=8) (actual time=0.611..0.618 rows=0 loops=1) - -> Seq Scan on t_create_gang_time (cost=0.00..392.75 rows=29 width=8) (never executed) - Filter: (tc1 = 1) - Optimizer: Postgres query optimizer - Planning Time: 0.187 ms - (slice0) Executor memory: 36K bytes. - (slice1) Executor memory: 36K bytes (seg1). - Memory used: 128000kB - Execution Time: 1.249 ms -(9 rows) - --n-gang reused and 1-gang is created. select * from t_create_gang_time t1, t_create_gang_time t2 where t1.tc1=2; INFO: (Slice1) is reused diff --git a/src/test/singlenode_regress/expected/temp.out b/src/test/singlenode_regress/expected/temp.out index af76a4564d1..69b4ebfe116 100644 --- a/src/test/singlenode_regress/expected/temp.out +++ b/src/test/singlenode_regress/expected/temp.out @@ -328,7 +328,7 @@ begin; create function pg_temp.twophase_func() returns void as $$ select '2pc_func'::text $$ language sql; prepare transaction 'twophase_func'; -ERROR: PREPARE TRANSACTION is not yet supported in Cloudberry Database +ERROR: PREPARE TRANSACTION is not supported in utility mode rollback; -- PREPARE TRANACTION is not supported in GPDB -- Function drop create function pg_temp.twophase_func() returns void as @@ -336,29 +336,29 @@ create function pg_temp.twophase_func() returns void as begin; drop function pg_temp.twophase_func(); prepare transaction 'twophase_func'; -ERROR: PREPARE TRANSACTION is not yet supported in Cloudberry Database +ERROR: PREPARE TRANSACTION is not supported in utility mode rollback; -- PREPARE TRANACTION is not supported in GPDB -- Operator creation begin; create operator pg_temp.@@ (leftarg = int4, rightarg = int4, procedure = int4mi); prepare transaction 'twophase_operator'; -ERROR: PREPARE TRANSACTION is not yet supported in Cloudberry Database +ERROR: PREPARE TRANSACTION is not supported in utility mode rollback; -- PREPARE TRANACTION is not supported in GPDB -- These generate errors about temporary tables. begin; create type pg_temp.twophase_type as (a int); prepare transaction 'twophase_type'; -ERROR: PREPARE TRANSACTION is not yet supported in Cloudberry Database +ERROR: PREPARE TRANSACTION is not supported in utility mode rollback; -- PREPARE TRANACTION is not supported in GPDB begin; create view pg_temp.twophase_view as select 1; prepare transaction 'twophase_view'; -ERROR: PREPARE TRANSACTION is not yet supported in Cloudberry Database +ERROR: PREPARE TRANSACTION is not supported in utility mode rollback; -- PREPARE TRANACTION is not supported in GPDB begin; create sequence pg_temp.twophase_seq; prepare transaction 'twophase_sequence'; -ERROR: PREPARE TRANSACTION is not yet supported in Cloudberry Database +ERROR: PREPARE TRANSACTION is not supported in utility mode rollback; -- PREPARE TRANACTION is not supported in GPDB -- Temporary tables cannot be used with two-phase commit. create temp table twophase_tab (a int); @@ -369,22 +369,22 @@ select a from twophase_tab; (0 rows) prepare transaction 'twophase_tab'; -ERROR: PREPARE TRANSACTION is not yet supported in Cloudberry Database +ERROR: PREPARE TRANSACTION is not supported in utility mode rollback; -- PREPARE TRANACTION is not supported in GPDB begin; insert into twophase_tab values (1); prepare transaction 'twophase_tab'; -ERROR: PREPARE TRANSACTION is not yet supported in Cloudberry Database +ERROR: PREPARE TRANSACTION is not supported in utility mode rollback; -- PREPARE TRANACTION is not supported in GPDB begin; lock twophase_tab in access exclusive mode; prepare transaction 'twophase_tab'; -ERROR: PREPARE TRANSACTION is not yet supported in Cloudberry Database +ERROR: PREPARE TRANSACTION is not supported in utility mode rollback; -- PREPARE TRANACTION is not supported in GPDB begin; drop table twophase_tab; prepare transaction 'twophase_tab'; -ERROR: PREPARE TRANSACTION is not yet supported in Cloudberry Database +ERROR: PREPARE TRANSACTION is not supported in utility mode rollback; -- PREPARE TRANACTION is not supported in GPDB -- Corner case: current_schema may create a temporary schema if namespace -- creation is pending, so check after that. First reset the connection @@ -399,4 +399,4 @@ SELECT current_schema() ~ 'pg_temp' AS is_temp_schema; (1 row) PREPARE TRANSACTION 'twophase_search'; -ERROR: PREPARE TRANSACTION is not yet supported in Cloudberry Database +ERROR: PREPARE TRANSACTION is not supported in utility mode