Skip to content

Commit

Permalink
DE38 race condition when pool is exported and connection to target is…
Browse files Browse the repository at this point in the history
… in progress

The problem is that conn struct can be freed by zinfo destroy thread
(conn_fd == -1), while it is referenced from event loop thread. To avoid
this we force conns to be destroyed exclusively from event loop thread.

Signed-off-by: Jan Kryl <jan.kryl@cloudbyte.com>
  • Loading branch information
Jan Kryl committed Jun 26, 2018
1 parent d0963a0 commit 959ff7a
Showing 1 changed file with 36 additions and 31 deletions.
67 changes: 36 additions & 31 deletions cmd/zrepl/mgmt_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ close_conn(uzfs_mgmt_conn_t *conn)
{
async_task_t *async_task;

DBGCONN(conn, "Closing the connection");

/* Release resources tight to the conn */
if (conn->conn_buf != NULL) {
kmem_free(conn->conn_buf, conn->conn_bufsiz);
Expand Down Expand Up @@ -194,6 +196,26 @@ close_conn(uzfs_mgmt_conn_t *conn)
return (0);
}

/*
* Complete destruction of conn struct. conn list mtx must be held when calling
* this function.
* Close connection if still open, remove conn from list of conns and free it.
*/
static int
destroy_conn(uzfs_mgmt_conn_t *conn)
{
ASSERT(MUTEX_HELD(&conn_list_mtx));

if (conn->conn_fd >= 0) {
if (close_conn(conn) != 0)
return (-1);
}
DBGCONN(conn, "Destroying the connection");
SLIST_REMOVE(&uzfs_mgmt_conns, conn, uzfs_mgmt_conn, conn_next);
kmem_free(conn, sizeof (*conn));
return (0);
}

/*
* Create non-blocking socket and initiate connection to the target.
* Returns the new FD or -1.
Expand Down Expand Up @@ -232,12 +254,15 @@ connect_to_tgt(uzfs_mgmt_conn_t *conn)
static int
scan_conn_list(void)
{
uzfs_mgmt_conn_t *conn;
uzfs_mgmt_conn_t *conn, *conn_tmp;
struct epoll_event ev;
int rc = 0;

mutex_enter(&conn_list_mtx);
SLIST_FOREACH(conn, &uzfs_mgmt_conns, conn_next) {
/* iterate safely because entries can be destroyed while iterating */
conn = SLIST_FIRST(&uzfs_mgmt_conns);
while (conn != NULL) {
conn_tmp = SLIST_NEXT(conn, conn_next);
/* we need to create new connection */
if (conn->conn_refcount > 0 && conn->conn_fd < 0 &&
time(NULL) - conn->conn_last_connect >= RECONNECT_DELAY) {
Expand All @@ -256,13 +281,13 @@ scan_conn_list(void)
}
}
/* we need to close unused connection */
} else if (conn->conn_refcount == 0 && conn->conn_fd >= 0) {
DBGCONN(conn, "Closing the connection");
if (close_conn(conn) != 0) {
} else if (conn->conn_refcount == 0) {
if (destroy_conn(conn) != 0) {
rc = -1;
break;
}
}
conn = conn_tmp;
}
mutex_exit(&conn_list_mtx);

Expand Down Expand Up @@ -363,31 +388,10 @@ zinfo_destroy_cb(zvol_info_t *zinfo)
zinfo->mgmt_conn = NULL;

if (--conn->conn_refcount == 0) {
/* signal the event loop thread to close FD */
if (conn->conn_fd >= 0) {
ASSERT3P(mgmt_eventfd, >=, 0);
rc = write(mgmt_eventfd, &val, sizeof (val));
ASSERT3P(rc, ==, sizeof (val));
mutex_exit(&conn_list_mtx);
/* wait for event loop thread to close the FD */
/* TODO: too rough algorithm with sleep */
while (1) {
usleep(1000);
mutex_enter(&conn_list_mtx);
if (conn->conn_refcount > 0 ||
conn->conn_fd < 0)
break;
mutex_exit(&conn_list_mtx);
}
/* someone else reused the conn while waiting - ok */
if (conn->conn_refcount > 0) {
mutex_exit(&conn_list_mtx);
return;
}
}
SLIST_REMOVE(&uzfs_mgmt_conns, conn, uzfs_mgmt_conn,
conn_next);
kmem_free(conn, sizeof (*conn));
/* signal the event loop thread to close FD and destroy conn */
ASSERT3P(mgmt_eventfd, >=, 0);
rc = write(mgmt_eventfd, &val, sizeof (val));
ASSERT3P(rc, ==, sizeof (val));
}
mutex_exit(&conn_list_mtx);
}
Expand Down Expand Up @@ -1136,7 +1140,8 @@ uzfs_zvol_mgmt_thread(void *arg)
epollfd = -1;
mutex_enter(&conn_list_mtx);
SLIST_FOREACH(conn, &uzfs_mgmt_conns, conn_next) {
close_conn(conn);
if (conn->conn_fd >= 0)
close_conn(conn);
}
mutex_exit(&conn_list_mtx);
mutex_destroy(&conn_list_mtx);
Expand Down

0 comments on commit 959ff7a

Please sign in to comment.