Skip to content

Commit

Permalink
[DE94]fix(zfs):zrepl crash during data conn break (openzfs#109)
Browse files Browse the repository at this point in the history
When data connection breaks, volume's metavolblocksize is made to 0.
If any IOs are pending during this time, executing them will use this
and causes floating point exception.

Fix is NOT to make metavolblocksize as 0, as this is not required.

Signed-off-by: Vishnu Itta <vitta@mayadata.io>
  • Loading branch information
vishnuitta authored Sep 12, 2018
1 parent 11fb030 commit cc8484d
Show file tree
Hide file tree
Showing 8 changed files with 115 additions and 39 deletions.
1 change: 1 addition & 0 deletions include/gtest_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

namespace GtestUtils {

void graceful_close(int sockfd);
std::string execCmd(std::string const &zfsCmd, std::string const &args);
std::string getCmdPath(std::string zfsCmd);
int verify_buf(void *buf, int len, const char *pattern);
Expand Down
1 change: 1 addition & 0 deletions include/zrepl_mgmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ struct zvol_io_cmd_s;
#if DEBUG
typedef struct inject_delay_s {
int helping_replica_rebuild_step;
int pre_uzfs_write_data;
} inject_delay_t;

typedef struct inject_error_s {
Expand Down
18 changes: 18 additions & 0 deletions lib/libzpool/uzfs_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
#include <uzfs_io.h>
#include <zrepl_mgmt.h>

#if DEBUG
inject_error_t inject_error;
#endif

#define GET_NEXT_CHUNK(chunk_io, offset, len, end) \
do { \
uzfs_io_chunk_list_t *node; \
Expand Down Expand Up @@ -82,6 +86,7 @@ uzfs_write_data(zvol_state_t *zv, char *buf, uint64_t offset, uint64_t len,
* If trying IO on fresh zvol before metadata granularity is set return
* error.
*/

if (zv->zv_metavolblocksize == 0)
return (EINVAL);
ASSERT3P(zv->zv_metavolblocksize, !=, 0);
Expand All @@ -96,6 +101,12 @@ uzfs_write_data(zvol_state_t *zv, char *buf, uint64_t offset, uint64_t len,
sync = (dmu_objset_syncprop(os) == ZFS_SYNC_ALWAYS) ? 1 : 0;
ASSERT3P(zv->zv_volmetablocksize, !=, 0);

#if DEBUG
if (inject_error.delay.pre_uzfs_write_data == 1) {
LOG_DEBUG("delaying write");
sleep(10);
}
#endif
if (metadata != NULL) {
mlen = get_metadata_len(zv, offset, len);
VERIFY((mlen % metadatasize) == 0);
Expand Down Expand Up @@ -479,6 +490,13 @@ uzfs_update_metadata_granularity(zvol_state_t *zv, uint64_t tgt_block_size)
if (tgt_block_size == zv->zv_metavolblocksize)
return (0); /* nothing to update */

if ((zv->zv_metavolblocksize != 0) &&
(tgt_block_size != zv->zv_metavolblocksize)) {
LOG_ERR("Update metadata granularity from old %lu to new %lu "
"failed", zv->zv_metavolblocksize, tgt_block_size);
return (-1);
}

tx = dmu_tx_create(zv->zv_objset);
dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
error = dmu_tx_assign(tx, TXG_WAIT);
Expand Down
13 changes: 9 additions & 4 deletions lib/libzpool/zrepl_mgmt.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,21 +301,23 @@ uzfs_zinfo_destroy(const char *name, spa_t *spa)
zvol_info_t *zt = NULL;
int namelen = ((name) ? strlen(name) : 0);
zvol_state_t *zv;
int destroyed = 0;

mutex_enter(&zvol_list_mutex);

/* clear out all zvols for this spa_t */
if (name == NULL) {
SLIST_FOREACH_SAFE(zinfo, &zvol_list, zinfo_next, zt) {
if (strncmp(spa_name(spa),
zinfo->name, strlen(spa_name(spa))) == 0) {
if (strcmp(spa_name(spa),
spa_name(zinfo->zv->zv_spa)) == 0) {
SLIST_REMOVE(&zvol_list, zinfo, zvol_info_s,
zinfo_next);

mutex_exit(&zvol_list_mutex);
zv = zinfo->zv;
uzfs_mark_offline_and_free_zinfo(zinfo);
uzfs_close_dataset(zv);
destroyed++;
mutex_enter(&zvol_list_mutex);
}
}
Expand All @@ -332,12 +334,15 @@ uzfs_zinfo_destroy(const char *name, spa_t *spa)
zv = zinfo->zv;
uzfs_mark_offline_and_free_zinfo(zinfo);
uzfs_close_dataset(zv);
mutex_enter(&zvol_list_mutex);
break;
destroyed++;
goto end;
}
}
}
mutex_exit(&zvol_list_mutex);
end:
LOG_INFO("Destroy for pool: %s vol: %s, destroyed: %d", (spa == NULL) ?
"null" : spa_name(spa), (name == NULL) ? "null" : name, destroyed);
return (0);
}

Expand Down
17 changes: 14 additions & 3 deletions lib/libzrepl/data_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,14 @@ uzfs_zvol_worker(void *arg)
rebuild_cmd_req = hdr->flags & ZVOL_OP_FLAG_REBUILD;
read_metadata = hdr->flags & ZVOL_OP_FLAG_READ_METADATA;

if (zinfo->is_io_ack_sender_created == B_FALSE) {
if (!(rebuild_cmd_req && (hdr->opcode == ZVOL_OPCODE_WRITE)))
zio_cmd_free(&zio_cmd);
if (hdr->opcode == ZVOL_OPCODE_WRITE)
atomic_inc_64(&zinfo->write_req_received_cnt);
goto drop_refcount;
}

/*
* For rebuild case, do not free zio_cmd
*/
Expand Down Expand Up @@ -1163,7 +1171,6 @@ reinitialize_zv_state(zvol_state_t *zv)
{
if (zv == NULL)
return;
zv->zv_metavolblocksize = 0;

uzfs_zvol_set_status(zv, ZVOL_STATUS_DEGRADED);
uzfs_zvol_set_rebuild_status(zv, ZVOL_REBUILDING_INIT);
Expand Down Expand Up @@ -1347,11 +1354,14 @@ uzfs_zvol_io_ack_sender(void *arg)
LOG_INFO("Data connection for zvol %s closed on fd: %d",
zinfo->name, fd);

(void) pthread_mutex_lock(&zinfo->zinfo_mutex);
zinfo->is_io_ack_sender_created = B_FALSE;
(void) pthread_mutex_unlock(&zinfo->zinfo_mutex);

remove_pending_cmds_to_ack(fd, zinfo);

(void) pthread_mutex_lock(&zinfo->zinfo_mutex);
zinfo->conn_closed = B_FALSE;
zinfo->is_io_ack_sender_created = B_FALSE;
(void) pthread_mutex_unlock(&zinfo->zinfo_mutex);

uzfs_zinfo_drop_refcnt(zinfo);
Expand Down Expand Up @@ -1605,7 +1615,7 @@ uzfs_zvol_io_receiver(void *arg)
* wait for ack thread to exit to avoid races with new
* connections for the same zinfo
*/
while (zinfo->conn_closed && zinfo->is_io_ack_sender_created) {
while (zinfo->conn_closed || zinfo->is_io_ack_sender_created) {
(void) pthread_mutex_unlock(&zinfo->zinfo_mutex);
usleep(1000);
(void) pthread_mutex_lock(&zinfo->zinfo_mutex);
Expand All @@ -1616,6 +1626,7 @@ uzfs_zvol_io_receiver(void *arg)

zinfo->io_ack_waiting = 0;

taskq_wait(zinfo->uzfs_zvol_taskq);
reinitialize_zv_state(zinfo->zv);
zinfo->is_io_receiver_created = B_FALSE;
uzfs_zinfo_drop_refcnt(zinfo);
Expand Down
19 changes: 19 additions & 0 deletions tests/cbtest/gtest/gtest_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,25 @@
#define POOL_SIZE (100 * 1024 * 1024)
#define ZVOL_SIZE (10 * 1024 * 1024)

/*
* We have to wait for the other end to close the connection, because the
* next test case could initiate a new connection before this one is
* fully closed and cause a handshake error. Or it could result in EBUSY
* error when destroying zpool if it is not released in time by zrepl.
*/
void GtestUtils::graceful_close(int sockfd)
{
int rc;
char val;

if (sockfd < 0)
return;
shutdown(sockfd, SHUT_WR);
rc = read(sockfd, &val, sizeof (val));
ASSERT_EQ(rc, 0);
close(sockfd);
}

void GtestUtils::init_buf(void *buf, int len, const char *pattern) {
int i;
char c;
Expand Down
66 changes: 53 additions & 13 deletions tests/cbtest/gtest/test_uzfs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ uzfs_mock_rebuild_scanner(void *arg)

if (rebuild_test_case == 6) {
close(data_conn_fd);
while (zinfo->is_io_receiver_created == B_TRUE)
sleep(2);
sleep(5);
}

Expand Down Expand Up @@ -236,8 +238,10 @@ uzfs_mock_rebuild_scanner(void *arg)
exit:
shutdown(fd, SHUT_RDWR);
exit1:
rebuild_test_case = 0;
close(fd);

rebuild_test_case = 0;

zk_thread_exit();
}

Expand Down Expand Up @@ -950,6 +954,7 @@ uzfs_mock_zvol_rebuild_dw_replica(void *arg)
#if DEBUG
inject_error.delay.helping_replica_rebuild_step = 0;
#endif
LOG_INFO("Resetting delay to zero");
}

rc = uzfs_zvol_socket_read(sfd, (char *)&hdr, sizeof (hdr));
Expand Down Expand Up @@ -1044,7 +1049,8 @@ uzfs_mock_zvol_rebuild_dw_replica(void *arg)
}

void execute_rebuild_test_case(const char *s, int test_case,
zvol_rebuild_status_t status, zvol_rebuild_status_t verify_status)
zvol_rebuild_status_t status, zvol_rebuild_status_t verify_status,
int verify_refcnt = 2)
{
kthread_t *thrd;
rebuild_thread_arg_t *rebuild_args;
Expand All @@ -1069,7 +1075,7 @@ void execute_rebuild_test_case(const char *s, int test_case,
break;
}

EXPECT_EQ(2, zinfo->refcnt);
EXPECT_EQ(verify_refcnt, zinfo->refcnt);

EXPECT_EQ(verify_status, uzfs_zvol_get_rebuild_status(zinfo->zv));
}
Expand Down Expand Up @@ -1171,19 +1177,23 @@ static void do_data_connection(int &data_fd, std::string host, uint16_t port,
TEST(uZFS, TestRebuildCompleteWithDataConn) {
io_receiver = &uzfs_zvol_io_receiver;

uzfs_update_metadata_granularity(zv, 0);
uzfs_zvol_set_rebuild_status(zv, ZVOL_REBUILDING_INIT);
do_data_connection(data_conn_fd, "127.0.0.1", 3232, "vol1");
do_data_connection(data_conn_fd, "127.0.0.1", IO_SERVER_PORT, "vol1");
/* thread helping rebuild will exit after writing valid write IO and REBUILD_STEP_DONE, and reads REBUILD_STEP, writes REBUILD_STEP_DONE */
execute_rebuild_test_case("complete rebuild with data conn", 6, ZVOL_REBUILDING_IN_PROGRESS, ZVOL_REBUILDING_INIT);
}

TEST(uZFS, TestRebuildComplete) {
uzfs_update_metadata_granularity(zv, 512);
uzfs_zvol_set_rebuild_status(zv, ZVOL_REBUILDING_INIT);
do_data_connection(data_conn_fd, "127.0.0.1", IO_SERVER_PORT, "vol1");
/* thread helping rebuild will exit after writing valid write IO and REBUILD_STEP_DONE, and reads REBUILD_STEP, writes REBUILD_STEP_DONE */
execute_rebuild_test_case("complete rebuild", 7, ZVOL_REBUILDING_IN_PROGRESS, ZVOL_REBUILDING_DONE);
execute_rebuild_test_case("complete rebuild", 7, ZVOL_REBUILDING_IN_PROGRESS, ZVOL_REBUILDING_DONE, 4);
EXPECT_EQ(ZVOL_STATUS_HEALTHY, uzfs_zvol_get_status(zinfo->zv));

close(data_conn_fd);
while (zinfo->is_io_receiver_created == B_TRUE)
sleep(2);
sleep(5);
memset(&zinfo->zv->rebuild_info, 0, sizeof (zvol_rebuild_info_t));
}

Expand Down Expand Up @@ -1243,31 +1253,61 @@ TEST(RebuildScanner, AckSenderCreatedFalse) {
zinfo2->is_io_ack_sender_created = B_TRUE;
execute_rebuild_test_case("Ack Sender Created False", 8,
ZVOL_REBUILDING_IN_PROGRESS, ZVOL_REBUILDING_FAILED);
zinfo2->is_io_ack_sender_created = B_FALSE;
zinfo2->is_io_ack_sender_created = B_TRUE;
}

TEST(RebuildScanner, ShutdownRebuildFd) {
/* Set io_ack_sender_created as B_FALSE */
uzfs_update_metadata_granularity(zv2, 0);
zinfo2->is_io_ack_sender_created = B_FALSE;
uzfs_zvol_set_rebuild_status(zv2, ZVOL_REBUILDING_INIT);
do_data_connection(data_conn_fd, "127.0.0.1", 3232, "vol3");
do_data_connection(data_conn_fd, "127.0.0.1", IO_SERVER_PORT, "vol3");
execute_rebuild_test_case("Shutdown Rebuild FD", 9,
ZVOL_REBUILDING_IN_PROGRESS, ZVOL_REBUILDING_FAILED);
}

TEST(RebuildScanner, RebuildSuccess) {
uzfs_update_metadata_granularity(zv2, 0);
uzfs_zvol_set_rebuild_status(zv2, ZVOL_REBUILDING_INIT);
do_data_connection(data_conn_fd, "127.0.0.1", 3232, "vol3");
do_data_connection(data_conn_fd, "127.0.0.1", IO_SERVER_PORT, "vol3");
zvol_rebuild_step_size = (1024ULL * 1024ULL * 100);

/* Rebuild thread sendinc complete opcode */
/* Rebuild thread sending complete opcode */
execute_rebuild_test_case("complete rebuild", 10,
ZVOL_REBUILDING_IN_PROGRESS, ZVOL_REBUILDING_DONE);
EXPECT_EQ(ZVOL_STATUS_HEALTHY, uzfs_zvol_get_status(zinfo->zv));
memset(&zinfo->zv->rebuild_info, 0, sizeof (zvol_rebuild_info_t));
}

TEST(Misc, DelayWriteAndBreakConn) {
struct zvol_io_rw_hdr *io_hdr;
zvol_io_cmd_t *zio_cmd;

#if DEBUG
inject_error.delay.pre_uzfs_write_data = 1;
#endif
zvol_io_hdr_t hdr;
bzero(&hdr, sizeof (hdr));
hdr.status = ZVOL_OP_STATUS_OK;
hdr.version = REPLICA_VERSION;
hdr.opcode = ZVOL_OPCODE_WRITE;
hdr.len = 512 + sizeof(struct zvol_io_rw_hdr);
zio_cmd = zio_cmd_alloc(&hdr, -1);

io_hdr = (struct zvol_io_rw_hdr *)zio_cmd->buf;
io_hdr->io_num = 100;
io_hdr->len = 512;

uzfs_zinfo_take_refcnt(zinfo2);
zio_cmd->zv = zinfo2;

taskq_dispatch(zinfo2->uzfs_zvol_taskq, uzfs_zvol_worker, zio_cmd, TQ_SLEEP);
sleep(5);
GtestUtils::graceful_close(data_conn_fd);
sleep(10);
#if DEBUG
inject_error.delay.pre_uzfs_write_data = 0;
#endif
}

/* Volume name stored in zinfo is "pool1/vol1" */
TEST(VolumeNameCompare, VolumeNameCompareTest) {

Expand Down
19 changes: 0 additions & 19 deletions tests/cbtest/gtest/test_zrepl_prot.cc
Original file line number Diff line number Diff line change
Expand Up @@ -406,25 +406,6 @@ static void transition_zvol_to_online(int &ioseq, int control_fd,
EXPECT_EQ(hdr_in.len, 0);
}

/*
* We have to wait for the other end to close the connection, because the
* next test case could initiate a new connection before this one is
* fully closed and cause a handshake error. Or it could result in EBUSY
* error when destroying zpool if it is not released in time by zrepl.
*/
static void graceful_close(int sockfd)
{
int rc;
char val;

if (sockfd < 0)
return;
shutdown(sockfd, SHUT_WR);
rc = read(sockfd, &val, sizeof (val));
ASSERT_EQ(rc, 0);
close(sockfd);
}

static std::string getPoolState(std::string pname)
{
return (execCmd("zpool", std::string("list -Ho health ") + pname));
Expand Down

0 comments on commit cc8484d

Please sign in to comment.