Details of conditions for failure to start recovery to read checkpoint records in PostgreSQL combat

  • 2020-05-06 11:56:36
  • OfStack

1. First read ControlFile-> checkPoint points to checkpoint

2. If the read fails, slave directly exits abort, and master reads ControlFile-> again prevCheckPoint points to checkpoint


StartupXLOG->
 |--checkPointLoc = ControlFile->checkPoint;
 |--record = ReadCheckpointRecord(xlogreader, checkPointLoc, 1, true):
 |-- if (record != NULL){
   ...
  }else if (StandbyMode){
   ereport(PANIC,(errmsg("could not locate a valid checkpoint record")));
  }else{
   checkPointLoc = ControlFile->prevCheckPoint;
   record = ReadCheckpointRecord(xlogreader, checkPointLoc, 2, true);
   if (record != NULL){
    InRecovery = true;// Mark down enter recovery
   }else{
    ereport(PANIC,(errmsg("could not locate a valid checkpoint record")));
   }
  }

1. Then checkpoint records record==NULL?

1, > ControlFile - checkPoint % XLOG_BLCKSZ < SizeOfXLogShortPHD
2, ReadRecord (xlogreader ControlFile - > checkPoint, LOG, true) returns NULL
3, ReadRecord read record! = NULL && record - > xl_rmid != RM_XLOG_ID
4, ReadRecord read record! = NULL && info! = XLOG_CHECKPOINT_SHUTDOWN && info! = XLOG_CHECKPOINT_ONLINE
5, ReadRecord read record! = NULL && record - > xl_tot_len != SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint)

The ReadRecord function returns the condition

for NULL

ReadRecord(xlogreader, ControlFile->checkPoint, LOG, true)
 |--record = XLogReadRecord(xlogreader, ControlFile->checkPoint, &errormsg);
 |-- 2.1 record==NULL &&  ! StandbyMode
 |-- 2.2 record!=NULL && !tliInHistory(xlogreader->latestPageTLI, expectedTLEs)
 /*-----
 note: As long as read a page xlog, A value is assigned to the timeline of the first record on the page 
 XLogReaderValidatePageHeader
  -->xlogreader->latestPageTLI=hdr->xlp_tli;
 ------*/

3, XlogReadRecord read checkpoint return NULL conditions?

XLogReadRecord(xlogreader, ControlFile- > checkPoint, &errormsg)
      targetPagePtr = ControlFile- > checkPoint - (ControlFile- > checkPoint % XLOG_BLCKSZ);
      targetRecOff = ControlFile- > checkPoint % XLOG_BLCKSZ;
      readOff = ReadPageInternal(state,targetPagePtr, Min(targetRecOff + SizeOfXLogRecord, XLOG_BLCKSZ));
      pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state- > readBuf);
      record = (XLogRecord *) (state- > readBuf + RecPtr % XLOG_BLCKSZ);
      total_len = record- > xl_tot_len;
      -------------
      1, readOff < 0
      2, 0< targetRecOff < pageHeaderSize
      3, (((XLogPageHeader) state-> readBuf)- > xlp_info & XLP_FIRST_IS_CONTRECORD) && targetRecOff == pageHeaderSize
            page header has a cross-page record and checkpoint positioning offset is exactly at the bottom of the header
      4, targetRecOff < = XLOG_BLCKSZ - SizeOfXLogRecord &&
            !ValidXLogRecordHeader(state, ControlFile- > checkPoint, state- > ReadRecPtr, record,randAccess)
            -- xl_tot_len < SizeOfXLogRecord || record- > xl_rmid > RM_MAX_ID || record- > xl_prev != state- > ReadRecPtr)
      5, targetRecOff > XLOG_BLCKSZ - SizeOfXLogRecord && total_len < SizeOfXLogRecord
      6, total_len > state- > readRecordBufSize && !allocate_recordbuf(state, total_len)
          if the record is damaged and total_len is very long, allocate_recordbuf extension state-> is required readbuf, may therefore fail to allocate abort
checksum of             records need to wait for the full record to be read before checking
      -------------

, ReadPageInternal, readOff, ,


ReadPageInternal(state,targetPagePtr, Min(targetRecOff + SizeOfXLogRecord, XLOG_BLCKSZ))

1, the first read wal file, readLen = state-> read_page: read the first page. readLen < 0

      2, readLen> 0 && !XLogReaderValidatePageHeader(state, targetSegmentPtr, state- > readBuf)
      --

3. Read readLen = state-> on the page where checkpoint is located read_page: readLen < 0

      4, readLen > 0 && readLen < = SizeOfXLogShortPHD

5,! XLogReaderValidatePageHeader(state, pageptr, (char *) hdr)

When does return the value < 0 ?


/*
 1 , WaitForWALToBecomeAvailable open failure 
 2 , lseek  failure  &&  ! StandbyMode
 3 , read failure  &&  ! StandbyMode
 4 , calibration page First failure  &&  ! StandbyMode
  If it is StandbyMode , will start again retry->WaitForWALToBecomeAvailable, Switch log sources open
 */
 !WaitForWALToBecomeAvailable(targetPagePtr + reqLen,private->randAccess,1,targetRecPtr)//open
 |-- return -1
 readOff = targetPageOff;
 if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0){
   ! StandbyMode:: return -1
 }
 if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ){
   ! StandbyMode:: return -1
 }
 XLogReaderValidatePageHeader(xlogreader, targetPagePtr, readBuf)
  ! StandbyMode:: return -1

5. When will WaitForWALToBecomeAvailable return false?

--XLOG_FROM_ARCHIVE | XLOG_FROM_PG_WAL
1, first XLogFileReadAnyTLI open log:
1. For each timeline in the time interval list, start with the latest
2. When reading checkpoint, source is XLOG_FROM_ANY
3. Find the archived log for open; If open fails, find WAL log for open
4. If neither           open is successful, then look forward to the timeline, segno segno the previous timeline segno and the same file number open
              5, open on success expectedTLEs is assigned to all the values
of the current timeline list If open fails, switch log source: XLOG_FROM_ARCHIVE | XLOG_FROM_PG_WAL-> XLOG_FROM_STREAM
      3, after switching log source, XLOG_FROM_ARCHIVE | XLOG_FROM_PG_WAL:
          slave && promote: return false
           ! StandbyMode: return false
      --XLOG_FROM_STREAM
1,! WalRcvStreaming() that is, receiver process is dead, switch log source
      2, CheckForStandbyTrigger() switch log source
      3, XLOG_FROM_STREAM-> XLOG_FROM_ARCHIVE

summary


Related articles: