From a260728984e27137130681ab24fa83a4f4815c4e Mon Sep 17 00:00:00 2001 From: Anton Shyrabokau <97127717+antons-antons@users.noreply.github.com> Date: Fri, 1 Apr 2022 12:44:28 -0700 Subject: [PATCH] Enable dumping corrupt WAL segments (#145) * Enable dumping corrupt WAL segments Add ability to dump WAL segment with corrupt page headers and recrods skips over missing/broken page headers skips over misformatted log recrods allows dumping log record from a particular file starting from an optional offset (without a need of carefully crafted input) --- src/backend/access/transam/xlogreader.c | 117 ++++++++++---- src/bin/pg_waldump/pg_waldump.c | 194 ++++++++++++++++++++++-- src/include/access/xlogreader.h | 5 + 3 files changed, 276 insertions(+), 40 deletions(-) diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index d797d9d5087..9f52c6fca18 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -239,7 +239,7 @@ WALOpenSegmentInit(WALOpenSegment *seg, WALSegmentContext *segcxt, void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr) { - Assert(!XLogRecPtrIsInvalid(RecPtr)); + Assert(!XLogRecPtrIsInvalid(RecPtr) || state->skip_lsn_checks); ResetDecoder(state); @@ -279,6 +279,14 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) bool gotheader; int readOff; +#define SKIP_INVALID_RECORD(rec_ptr) do { \ + rec_ptr = MAXALIGN(rec_ptr + 1); \ + if (rec_ptr % XLOG_BLCKSZ <= MAXALIGN(1)) \ + goto restart; \ + else \ + goto skip_invalid; \ + } while (0); + /* * randAccess indicates whether to verify the previous-record pointer of * the record we're reading. We only do this if we're reading @@ -315,7 +323,7 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) * In this case, EndRecPtr should already be pointing to a valid * record starting position. */ - Assert(XRecOffIsValid(RecPtr)); + Assert(XRecOffIsValid(RecPtr) || state->skip_lsn_checks); randAccess = true; } @@ -351,17 +359,23 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) } else if (targetRecOff < pageHeaderSize) { - report_invalid_record(state, "invalid record offset at %X/%X", + if(!state->skip_page_validation) + { + report_invalid_record(state, "invalid record offset at %X/%X", LSN_FORMAT_ARGS(RecPtr)); - goto err; + goto err; + } } if ((((XLogPageHeader) state->readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) && targetRecOff == pageHeaderSize) { - report_invalid_record(state, "contrecord is requested by %X/%X", + if(!state->skip_page_validation) + { + report_invalid_record(state, "contrecord is requested by %X/%X", LSN_FORMAT_ARGS(RecPtr)); - goto err; + goto err; + } } /* ReadPageInternal has verified the page header */ @@ -376,6 +390,7 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) * cannot access any other fields until we've verified that we got the * whole header. */ +skip_invalid: record = (XLogRecord *) (state->readBuf + RecPtr % XLOG_BLCKSZ); total_len = record->xl_tot_len; @@ -391,7 +406,13 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) { if (!ValidXLogRecordHeader(state, RecPtr, state->ReadRecPtr, record, randAccess)) - goto err; + { + if(!state->skip_invalid_records) + goto err; + + SKIP_INVALID_RECORD(RecPtr); + } + gotheader = true; } else @@ -399,12 +420,19 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) /* XXX: more validation should be done here */ if (total_len < SizeOfXLogRecord) { - report_invalid_record(state, - "invalid record length at %X/%X: wanted %u, got %u", - LSN_FORMAT_ARGS(RecPtr), - (uint32) SizeOfXLogRecord, total_len); - goto err; + if(!state->skip_invalid_records) + { + report_invalid_record(state, + "invalid record length at %X/%X: wanted %u, got %u", + LSN_FORMAT_ARGS(RecPtr), + (uint32) SizeOfXLogRecord, total_len); + + goto err; + } + + SKIP_INVALID_RECORD(RecPtr); } + gotheader = false; } @@ -425,10 +453,16 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) if (total_len > state->readRecordBufSize && !allocate_recordbuf(state, total_len)) { - /* We treat this as a "bogus data" condition */ - report_invalid_record(state, "record length %u at %X/%X too long", - total_len, LSN_FORMAT_ARGS(RecPtr)); - goto err; + + if(!state->skip_invalid_records) + { + /* We treat this as a "bogus data" condition */ + report_invalid_record(state, "record length %u at %X/%X too long", + total_len, LSN_FORMAT_ARGS(RecPtr)); + goto err; + } + + SKIP_INVALID_RECORD(RecPtr); } /* Copy the first fragment of the record from the first page. */ @@ -473,10 +507,15 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) /* Check that the continuation on next page looks valid */ if (!(pageHeader->xlp_info & XLP_FIRST_IS_CONTRECORD)) { - report_invalid_record(state, + if(!state->skip_invalid_records) + { + report_invalid_record(state, "there is no contrecord flag at %X/%X", LSN_FORMAT_ARGS(RecPtr)); - goto err; + goto err; + } + + SKIP_INVALID_RECORD(RecPtr); } /* @@ -486,12 +525,17 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) if (pageHeader->xlp_rem_len == 0 || total_len != (pageHeader->xlp_rem_len + gotlen)) { - report_invalid_record(state, + if(!state->skip_invalid_records) + { + report_invalid_record(state, "invalid contrecord length %u (expected %lld) at %X/%X", pageHeader->xlp_rem_len, ((long long) total_len) - gotlen, LSN_FORMAT_ARGS(RecPtr)); - goto err; + goto err; + } + + SKIP_INVALID_RECORD(RecPtr); } /* Append the continuation from this page to the buffer */ @@ -522,7 +566,13 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) record = (XLogRecord *) state->readRecordBuf; if (!ValidXLogRecordHeader(state, RecPtr, state->ReadRecPtr, record, randAccess)) - goto err; + { + if(!state->skip_invalid_records) + goto err; + + SKIP_INVALID_RECORD(RecPtr); + } + gotheader = true; } } while (gotlen < total_len); @@ -531,7 +581,12 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) record = (XLogRecord *) state->readRecordBuf; if (!ValidXLogRecord(state, record, RecPtr)) - goto err; + { + if(!state->skip_invalid_records) + goto err; + + SKIP_INVALID_RECORD(RecPtr); + } pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state->readBuf); state->ReadRecPtr = RecPtr; @@ -548,7 +603,12 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) /* Record does not cross a page boundary */ if (!ValidXLogRecord(state, record, RecPtr)) - goto err; + { + if(!state->skip_invalid_records) + goto err; + + SKIP_INVALID_RECORD(RecPtr); + } state->EndRecPtr = RecPtr + MAXALIGN(total_len); @@ -652,8 +712,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) /* we can be sure to have enough WAL available, we scrolled back */ Assert(readLen == XLOG_BLCKSZ); - if (!XLogReaderValidatePageHeader(state, targetSegmentPtr, - state->readBuf)) + if (!XLogReaderValidatePageHeader(state, targetSegmentPtr, state->readBuf) && !state->skip_page_validation) goto err; } @@ -690,7 +749,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) /* * Now that we know we have the full header, validate it. */ - if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr)) + if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr) && !state->skip_page_validation) goto err; /* update read state information */ @@ -748,7 +807,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr, * We can't exactly verify the prev-link, but surely it should be less * than the record's own address. */ - if (!(record->xl_prev < RecPtr)) + if (!(record->xl_prev < RecPtr) && !state->skip_lsn_checks) { report_invalid_record(state, "record with incorrect prev-link %X/%X at %X/%X", @@ -764,7 +823,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr, * check guards against torn WAL pages where a stale but valid-looking * WAL record starts on a sector boundary. */ - if (record->xl_prev != PrevRecPtr) + if (record->xl_prev != PrevRecPtr && !state->skip_lsn_checks) { report_invalid_record(state, "record with incorrect prev-link %X/%X at %X/%X", @@ -907,7 +966,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, * check typically fails when an old WAL segment is recycled, and hasn't * yet been overwritten with new data yet. */ - if (hdr->xlp_pageaddr != recaddr) + if (hdr->xlp_pageaddr != recaddr && !state->skip_lsn_checks) { char fname[MAXFNAMELEN]; diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index f8b8afe4a7b..786da4be3ab 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -13,9 +13,11 @@ #include "postgres.h" #include +#include #include #include + #include "access/transam.h" #include "access/xlog_internal.h" #include "access/xlogreader.h" @@ -23,8 +25,11 @@ #include "common/fe_memutils.h" #include "common/logging.h" #include "getopt_long.h" +#include "port/pg_bitutils.h" #include "rmgrdesc.h" +#define OFFSET_INVALID ((size_t)-1) + static const char *progname; static int WalSegSz; @@ -35,6 +40,7 @@ typedef struct XLogDumpPrivate XLogRecPtr startptr; XLogRecPtr endptr; bool endptr_reached; + char* input_filename; } XLogDumpPrivate; typedef struct XLogDumpConfig @@ -52,6 +58,7 @@ typedef struct XLogDumpConfig int filter_by_rmgr; TransactionId filter_by_xid; bool filter_by_xid_enabled; + bool ignore_format_errors; } XLogDumpConfig; typedef struct Stats @@ -70,8 +77,36 @@ typedef struct XLogDumpStats Stats record_stats[RM_NEXT_ID][MAX_XLINFO_TYPES]; } XLogDumpStats; + #define fatal_error(...) do { pg_log_fatal(__VA_ARGS__); exit(EXIT_FAILURE); } while(0) +/* calculate ceil(log base 2) of num */ +static int +my_log2(long num) +{ + /* + * guard against too-large input, which would be invalid for + * pg_ceil_log2_*() + */ + if (num > LONG_MAX / 2) + num = LONG_MAX / 2; + +#if SIZEOF_LONG < 8 + return pg_ceil_log2_32(num); +#else + return pg_ceil_log2_64(num); +#endif +} + +/* calculate first power of 2 >= num, bounded to what will fit in an int */ +static int +next_pow2_int(long num) +{ + if (num > INT_MAX / 2) + num = INT_MAX / 2; + return 1 << my_log2(num); +} + static void print_rmgr_list(void) { @@ -287,6 +322,18 @@ WALDumpOpenSegment(XLogReaderState *state, XLogSegNo nextSegNo, TimeLineID tli = *tli_p; char fname[MAXPGPATH]; int tries; + XLogDumpPrivate *private = state->private_data; + + if(private->input_filename) + { + Assert(nextSegNo == 0); + + state->seg.ws_file = open_file_in_directory(state->segcxt.ws_dir, private->input_filename); + if (state->seg.ws_file >= 0) + return; + + fatal_error("could not open file \"%s\": %m", private->input_filename); + } XLogFileName(fname, tli, nextSegNo, state->segcxt.ws_segsize); @@ -357,6 +404,7 @@ WALDumpReadPage(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, { WALOpenSegment *seg = &errinfo.wre_seg; char fname[MAXPGPATH]; + char *actual_fname = private->input_filename ? private->input_filename : fname; XLogFileName(fname, seg->ws_tli, seg->ws_segno, state->segcxt.ws_segsize); @@ -365,11 +413,11 @@ WALDumpReadPage(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, { errno = errinfo.wre_errno; fatal_error("could not read from file %s, offset %u: %m", - fname, errinfo.wre_off); + actual_fname, errinfo.wre_off); } else fatal_error("could not read from file %s, offset %u: read %d of %zu", - fname, errinfo.wre_off, errinfo.wre_read, + actual_fname, errinfo.wre_off, errinfo.wre_read, (Size) errinfo.wre_req); } @@ -468,16 +516,25 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record) int block_id; uint8 info = XLogRecGetInfo(record); XLogRecPtr xl_prev = XLogRecGetPrev(record); + XLogDumpPrivate *private = record->private_data; StringInfoData s; XLogDumpRecordLen(record, &rec_len, &fpi_len); - printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ", + if(private->input_filename) + printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, offset: 0x%lX, prev %X/%08X, ", desc->rm_name, rec_len, XLogRecGetTotalLen(record), XLogRecGetXid(record), - LSN_FORMAT_ARGS(record->ReadRecPtr), + record->ReadRecPtr, LSN_FORMAT_ARGS(xl_prev)); + else + printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ", + desc->rm_name, + rec_len, XLogRecGetTotalLen(record), + XLogRecGetXid(record), + LSN_FORMAT_ARGS(record->ReadRecPtr), + LSN_FORMAT_ARGS(xl_prev)); id = desc->rm_identify(info); if (id == NULL) @@ -729,7 +786,10 @@ usage(void) printf(_(" -b, --bkp-details output detailed information about backup blocks\n")); printf(_(" -e, --end=RECPTR stop reading at WAL location RECPTR\n")); printf(_(" -f, --follow keep retrying after reaching end of WAL\n")); + printf(_(" -F, --file=FNAME dump log records from a single file\n")); + printf(_(" -i, --ignore ignore format errors, skip invalid structures\n")); printf(_(" -n, --limit=N number of records to display\n")); + printf(_(" -o, --offset=OFFSET offset of the first record to in a file to dump\n")); printf(_(" -p, --path=PATH directory in which to find log segment files or a\n" " directory with a ./pg_wal that contains such files\n" " (default: current directory, ./pg_wal, $PGDATA/pg_wal)\n")); @@ -760,14 +820,20 @@ main(int argc, char **argv) XLogRecord *record; XLogRecPtr first_record; char *waldir = NULL; + char *fname = NULL; char *errormsg; + bool single_file = false; + size_t start_offset = OFFSET_INVALID; static struct option long_options[] = { {"bkp-details", no_argument, NULL, 'b'}, {"end", required_argument, NULL, 'e'}, {"follow", no_argument, NULL, 'f'}, + {"file", required_argument, NULL, 'F'}, {"help", no_argument, NULL, '?'}, + {"ignore", no_argument, NULL, 'i'}, {"limit", required_argument, NULL, 'n'}, + {"offset", required_argument, NULL, 'o'}, {"path", required_argument, NULL, 'p'}, {"quiet", no_argument, NULL, 'q'}, {"rmgr", required_argument, NULL, 'r'}, @@ -808,6 +874,7 @@ main(int argc, char **argv) private.startptr = InvalidXLogRecPtr; private.endptr = InvalidXLogRecPtr; private.endptr_reached = false; + private.input_filename = NULL; config.quiet = false; config.bkp_details = false; @@ -819,6 +886,7 @@ main(int argc, char **argv) config.filter_by_xid_enabled = false; config.stats = false; config.stats_per_record = false; + config.ignore_format_errors = false; if (argc <= 1) { @@ -826,7 +894,7 @@ main(int argc, char **argv) goto bad_argument; } - while ((option = getopt_long(argc, argv, "be:fn:p:qr:s:t:x:z", + while ((option = getopt_long(argc, argv, "be:fF:in:o:p:qr:s:t:x:z", long_options, &optindex)) != -1) { switch (option) @@ -846,6 +914,13 @@ main(int argc, char **argv) case 'f': config.follow = true; break; + case 'F': + fname = pg_strdup(optarg); + single_file = true; + break; + case 'i': + config.ignore_format_errors = true; + break; case 'n': if (sscanf(optarg, "%d", &config.stop_after_records) != 1) { @@ -853,6 +928,13 @@ main(int argc, char **argv) goto bad_argument; } break; + case 'o': + if (sscanf(optarg, "%zu", &start_offset) != 1) + { + pg_log_error("could not parse offset \"%s\"", optarg); + goto bad_argument; + } + break; case 'p': waldir = pg_strdup(optarg); break; @@ -939,6 +1021,73 @@ main(int argc, char **argv) goto bad_argument; } + if (start_offset != OFFSET_INVALID) + { + if(!XLogRecPtrIsInvalid(private.startptr) || !XLogRecPtrIsInvalid(private.endptr)) + { + pg_log_error("either file offset or start/end pointers should be specified"); + goto bad_argument; + } + + if(!single_file) + { + pg_log_error("offset option could only be used with filename option"); + goto bad_argument; + } + + /* Log records are maxaligned, start at the closest next position */ + private.startptr = MAXALIGN(start_offset); + } + + if(single_file) + { + char *directory = NULL; + int fd; + struct stat stat; + + if(config.follow) + { + pg_log_error("Follow could not be used in file dump mode"); + goto bad_argument; + } + + if (waldir != NULL) + { + pg_log_error("either single file or wal directory should be specified"); + goto bad_argument; + } + + split_path(fname, &directory, &private.input_filename); + waldir = directory; + + if(waldir == NULL) + { + char *cwd = malloc(MAXPGPATH); + + if (!getcwd(cwd, MAXPGPATH)) + fatal_error("could identify current directory: %m"); + + waldir = cwd; + } + + if (!verify_directory(waldir)) + fatal_error("could not open directory \"%s\": %m", waldir); + + fd = open_file_in_directory(waldir, private.input_filename); + if (fd < 0) + fatal_error("could not open file \"%s\"", private.input_filename); + + if(fstat(fd, &stat) != 0) + fatal_error("could not stat file \"%s\"", private.input_filename); + + private.endptr = stat.st_size; + + /* Round up segment size to next power of 2 or 1MB */ + WalSegSz = Max(next_pow2_int(private.endptr), 1024 * 1024); + + close(fd); + } + if (waldir != NULL) { /* validate path points to directory */ @@ -957,6 +1106,12 @@ main(int argc, char **argv) int fd; XLogSegNo segno; + if(single_file) + { + pg_log_error("either single file or start/end boundaries should be specified"); + goto bad_argument; + } + split_path(argv[optind], &directory, &fname); if (waldir == NULL && directory != NULL) @@ -1029,10 +1184,11 @@ main(int argc, char **argv) } } else - waldir = identify_target_directory(waldir, NULL); + if (!single_file) + waldir = identify_target_directory(waldir, NULL); /* we don't know what to print */ - if (XLogRecPtrIsInvalid(private.startptr)) + if (XLogRecPtrIsInvalid(private.startptr) && !single_file) { pg_log_error("no start WAL location given"); goto bad_argument; @@ -1050,12 +1206,28 @@ main(int argc, char **argv) if (!xlogreader_state) fatal_error("out of memory"); - /* first find a valid recptr to start from */ - first_record = XLogFindNextRecord(xlogreader_state, private.startptr); + if(single_file) + { + if(config.ignore_format_errors) + { + xlogreader_state->skip_page_validation = true; + xlogreader_state->skip_invalid_records = true; + } + + xlogreader_state->skip_lsn_checks = true; - if (first_record == InvalidXLogRecPtr) - fatal_error("could not find a valid record after %X/%X", + first_record = private.startptr; + XLogBeginRead(xlogreader_state, first_record); + } + else + { + /* first find a valid recptr to start from */ + first_record = XLogFindNextRecord(xlogreader_state, private.startptr); + + if (first_record == InvalidXLogRecPtr) + fatal_error("could not find a valid record after %X/%X", LSN_FORMAT_ARGS(private.startptr)); + } /* * Display a message that we're skipping data if `from` wasn't a pointer diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index 10458c23eda..c7fac7bdace 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -262,6 +262,11 @@ struct XLogReaderState XLogRecPtr missingContrecPtr; /* Set when XLP_FIRST_IS_OVERWRITE_CONTRECORD is found */ XLogRecPtr overwrittenRecPtr; + + /* Disable validation to allow dumpng corrupt WAL */ + bool skip_page_validation; + bool skip_invalid_records; + bool skip_lsn_checks; }; /* Get a new XLogReader */