8 /* ---------------------------------- MASTER -------------------------------- */
10 void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
15 listRewind(slaves
,&li
);
16 while((ln
= listNext(&li
))) {
17 redisClient
*slave
= ln
->value
;
19 /* Don't feed slaves that are still waiting for BGSAVE to start */
20 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
22 /* Feed slaves that are waiting for the initial SYNC (so these commands
23 * are queued in the output buffer until the intial SYNC completes),
24 * or are already in sync with the master. */
25 if (slave
->slaveseldb
!= dictid
) {
28 if (dictid
>= 0 && dictid
< REDIS_SHARED_SELECT_CMDS
) {
29 selectcmd
= shared
.select
[dictid
];
30 incrRefCount(selectcmd
);
32 selectcmd
= createObject(REDIS_STRING
,
33 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
35 addReply(slave
,selectcmd
);
36 decrRefCount(selectcmd
);
37 slave
->slaveseldb
= dictid
;
39 addReplyMultiBulkLen(slave
,argc
);
40 for (j
= 0; j
< argc
; j
++) addReplyBulk(slave
,argv
[j
]);
44 void replicationFeedMonitors(redisClient
*c
, list
*monitors
, int dictid
, robj
**argv
, int argc
) {
48 sds cmdrepr
= sdsnew("+");
53 gettimeofday(&tv
,NULL
);
54 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%06ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
55 if (c
->flags
& REDIS_LUA_CLIENT
) {
56 cmdrepr
= sdscatprintf(cmdrepr
,"[%d lua] ", dictid
);
58 anetPeerToString(c
->fd
,ip
,&port
);
59 cmdrepr
= sdscatprintf(cmdrepr
,"[%d %s:%d] ", dictid
,ip
,port
);
62 for (j
= 0; j
< argc
; j
++) {
63 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
64 cmdrepr
= sdscatprintf(cmdrepr
, "\"%ld\"", (long)argv
[j
]->ptr
);
66 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
67 sdslen(argv
[j
]->ptr
));
70 cmdrepr
= sdscatlen(cmdrepr
," ",1);
72 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
73 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
75 listRewind(monitors
,&li
);
76 while((ln
= listNext(&li
))) {
77 redisClient
*monitor
= ln
->value
;
78 addReply(monitor
,cmdobj
);
83 void syncCommand(redisClient
*c
) {
84 /* ignore SYNC if aleady slave or in monitor mode */
85 if (c
->flags
& REDIS_SLAVE
) return;
87 /* Refuse SYNC requests if we are a slave but the link with our master
89 if (server
.masterhost
&& server
.repl_state
!= REDIS_REPL_CONNECTED
) {
90 addReplyError(c
,"Can't SYNC while not connected with my master");
94 /* SYNC can't be issued when the server has pending data to send to
95 * the client about already issued commands. We need a fresh reply
96 * buffer registering the differences between the BGSAVE and the current
97 * dataset, so that we can copy to other slaves if needed. */
98 if (listLength(c
->reply
) != 0) {
99 addReplyError(c
,"SYNC is invalid with pending input");
103 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
104 /* Here we need to check if there is a background saving operation
105 * in progress, or if it is required to start one */
106 if (server
.rdb_child_pid
!= -1) {
107 /* Ok a background save is in progress. Let's check if it is a good
108 * one for replication, i.e. if there is another slave that is
109 * registering differences since the server forked to save */
114 listRewind(server
.slaves
,&li
);
115 while((ln
= listNext(&li
))) {
117 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
120 /* Perfect, the server is already registering differences for
121 * another slave. Set the right state, and copy the buffer. */
122 copyClientOutputBuffer(c
,slave
);
123 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
124 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
126 /* No way, we need to wait for the next BGSAVE in order to
127 * register differences */
128 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
129 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
132 /* Ok we don't have a BGSAVE in progress, let's start one */
133 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
134 if (rdbSaveBackground(server
.rdb_filename
) != REDIS_OK
) {
135 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
136 addReplyError(c
,"Unable to perform background save");
139 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
142 c
->flags
|= REDIS_SLAVE
;
144 listAddNodeTail(server
.slaves
,c
);
148 void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
149 redisClient
*slave
= privdata
;
152 char buf
[REDIS_IOBUF_LEN
];
153 ssize_t nwritten
, buflen
;
155 if (slave
->repldboff
== 0) {
156 /* Write the bulk write count before to transfer the DB. In theory here
157 * we don't know how much room there is in the output buffer of the
158 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
159 * operations) will never be smaller than the few bytes we need. */
162 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
164 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
172 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
173 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
175 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
176 (buflen
== 0) ? "premature EOF" : strerror(errno
));
180 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
181 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
186 slave
->repldboff
+= nwritten
;
187 if (slave
->repldboff
== slave
->repldbsize
) {
188 close(slave
->repldbfd
);
189 slave
->repldbfd
= -1;
190 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
191 slave
->replstate
= REDIS_REPL_ONLINE
;
192 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
193 sendReplyToClient
, slave
) == AE_ERR
) {
197 addReplySds(slave
,sdsempty());
198 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
202 /* This function is called at the end of every backgrond saving.
203 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
204 * otherwise REDIS_ERR is passed to the function.
206 * The goal of this function is to handle slaves waiting for a successful
207 * background saving in order to perform non-blocking synchronization. */
208 void updateSlavesWaitingBgsave(int bgsaveerr
) {
213 listRewind(server
.slaves
,&li
);
214 while((ln
= listNext(&li
))) {
215 redisClient
*slave
= ln
->value
;
217 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
219 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
220 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
221 struct redis_stat buf
;
223 if (bgsaveerr
!= REDIS_OK
) {
225 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
228 if ((slave
->repldbfd
= open(server
.rdb_filename
,O_RDONLY
)) == -1 ||
229 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
231 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
234 slave
->repldboff
= 0;
235 slave
->repldbsize
= buf
.st_size
;
236 slave
->replstate
= REDIS_REPL_SEND_BULK
;
237 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
238 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
245 if (rdbSaveBackground(server
.rdb_filename
) != REDIS_OK
) {
248 listRewind(server
.slaves
,&li
);
249 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
250 while((ln
= listNext(&li
))) {
251 redisClient
*slave
= ln
->value
;
253 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
260 /* ----------------------------------- SLAVE -------------------------------- */
262 /* Abort the async download of the bulk dataset while SYNC-ing with master */
263 void replicationAbortSyncTransfer(void) {
264 redisAssert(server
.repl_state
== REDIS_REPL_TRANSFER
);
266 aeDeleteFileEvent(server
.el
,server
.repl_transfer_s
,AE_READABLE
);
267 close(server
.repl_transfer_s
);
268 close(server
.repl_transfer_fd
);
269 unlink(server
.repl_transfer_tmpfile
);
270 zfree(server
.repl_transfer_tmpfile
);
271 server
.repl_state
= REDIS_REPL_CONNECT
;
274 /* Asynchronously read the SYNC payload we receive from a master */
275 void readSyncBulkPayload(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
277 ssize_t nread
, readlen
;
279 REDIS_NOTUSED(privdata
);
282 /* If repl_transfer_left == -1 we still have to read the bulk length
283 * from the master reply. */
284 if (server
.repl_transfer_left
== -1) {
285 if (syncReadLine(fd
,buf
,1024,server
.repl_syncio_timeout
) == -1) {
286 redisLog(REDIS_WARNING
,
287 "I/O error reading bulk count from MASTER: %s",
293 redisLog(REDIS_WARNING
,
294 "MASTER aborted replication with an error: %s",
297 } else if (buf
[0] == '\0') {
298 /* At this stage just a newline works as a PING in order to take
299 * the connection live. So we refresh our last interaction
301 server
.repl_transfer_lastio
= server
.unixtime
;
303 } else if (buf
[0] != '$') {
304 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
307 server
.repl_transfer_left
= strtol(buf
+1,NULL
,10);
308 redisLog(REDIS_NOTICE
,
309 "MASTER <-> SLAVE sync: receiving %ld bytes from master",
310 server
.repl_transfer_left
);
315 readlen
= (server
.repl_transfer_left
< (signed)sizeof(buf
)) ?
316 server
.repl_transfer_left
: (signed)sizeof(buf
);
317 nread
= read(fd
,buf
,readlen
);
319 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
320 (nread
== -1) ? strerror(errno
) : "connection lost");
321 replicationAbortSyncTransfer();
324 server
.repl_transfer_lastio
= server
.unixtime
;
325 if (write(server
.repl_transfer_fd
,buf
,nread
) != nread
) {
326 redisLog(REDIS_WARNING
,"Write error or short write writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
329 server
.repl_transfer_left
-= nread
;
330 /* Check if the transfer is now complete */
331 if (server
.repl_transfer_left
== 0) {
332 if (rename(server
.repl_transfer_tmpfile
,server
.rdb_filename
) == -1) {
333 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
334 replicationAbortSyncTransfer();
337 redisLog(REDIS_NOTICE
, "MASTER <-> SLAVE sync: Loading DB in memory");
339 /* Before loading the DB into memory we need to delete the readable
340 * handler, otherwise it will get called recursively since
341 * rdbLoad() will call the event loop to process events from time to
342 * time for non blocking loading. */
343 aeDeleteFileEvent(server
.el
,server
.repl_transfer_s
,AE_READABLE
);
344 if (rdbLoad(server
.rdb_filename
) != REDIS_OK
) {
345 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
346 replicationAbortSyncTransfer();
349 /* Final setup of the connected slave <- master link */
350 zfree(server
.repl_transfer_tmpfile
);
351 close(server
.repl_transfer_fd
);
352 server
.master
= createClient(server
.repl_transfer_s
);
353 server
.master
->flags
|= REDIS_MASTER
;
354 server
.master
->authenticated
= 1;
355 server
.repl_state
= REDIS_REPL_CONNECTED
;
356 redisLog(REDIS_NOTICE
, "MASTER <-> SLAVE sync: Finished with success");
357 /* Restart the AOF subsystem now that we finished the sync. This
358 * will trigger an AOF rewrite, and when done will start appending
359 * to the new file. */
360 if (server
.aof_state
!= REDIS_AOF_OFF
) {
364 while (retry
-- && startAppendOnly() == REDIS_ERR
) {
365 redisLog(REDIS_WARNING
,"Failed enabling the AOF after successful master synchrnization! Trying it again in one second.");
369 redisLog(REDIS_WARNING
,"FATAL: this slave instance finished the synchronization with its master, but the AOF can't be turned on. Exiting now.");
378 replicationAbortSyncTransfer();
382 void syncWithMaster(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
383 char buf
[1024], tmpfile
[256];
384 int dfd
, maxtries
= 5;
386 REDIS_NOTUSED(privdata
);
389 /* If this event fired after the user turned the instance into a master
390 * with SLAVEOF NO ONE we must just return ASAP. */
391 if (server
.repl_state
== REDIS_REPL_NONE
) {
396 redisLog(REDIS_NOTICE
,"Non blocking connect for SYNC fired the event.");
397 /* This event should only be triggered once since it is used to have a
398 * non-blocking connect(2) to the master. It has been triggered when this
399 * function is called, so we can delete it. */
400 aeDeleteFileEvent(server
.el
,fd
,AE_READABLE
|AE_WRITABLE
);
402 /* AUTH with the master if required. */
403 if(server
.masterauth
) {
407 authlen
= snprintf(authcmd
,sizeof(authcmd
),"AUTH %s\r\n",server
.masterauth
);
408 if (syncWrite(fd
,authcmd
,authlen
,server
.repl_syncio_timeout
) == -1) {
409 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
413 /* Read the AUTH result. */
414 if (syncReadLine(fd
,buf
,1024,server
.repl_syncio_timeout
) == -1) {
415 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
420 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
425 /* Issue the SYNC command */
426 if (syncWrite(fd
,"SYNC \r\n",7,server
.repl_syncio_timeout
) == -1) {
427 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
432 /* Prepare a suitable temp file for bulk transfer */
434 snprintf(tmpfile
,256,
435 "temp-%d.%ld.rdb",(int)server
.unixtime
,(long int)getpid());
436 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
437 if (dfd
!= -1) break;
441 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
445 /* Setup the non blocking download of the bulk file. */
446 if (aeCreateFileEvent(server
.el
,fd
, AE_READABLE
,readSyncBulkPayload
,NULL
)
449 redisLog(REDIS_WARNING
,"Can't create readable event for SYNC");
453 server
.repl_state
= REDIS_REPL_TRANSFER
;
454 server
.repl_transfer_left
= -1;
455 server
.repl_transfer_fd
= dfd
;
456 server
.repl_transfer_lastio
= server
.unixtime
;
457 server
.repl_transfer_tmpfile
= zstrdup(tmpfile
);
461 server
.repl_state
= REDIS_REPL_CONNECT
;
466 int connectWithMaster(void) {
469 fd
= anetTcpNonBlockConnect(NULL
,server
.masterhost
,server
.masterport
);
471 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
476 if (aeCreateFileEvent(server
.el
,fd
,AE_READABLE
|AE_WRITABLE
,syncWithMaster
,NULL
) ==
480 redisLog(REDIS_WARNING
,"Can't create readable event for SYNC");
484 server
.repl_transfer_lastio
= server
.unixtime
;
485 server
.repl_transfer_s
= fd
;
486 server
.repl_state
= REDIS_REPL_CONNECTING
;
490 /* This function can be called when a non blocking connection is currently
491 * in progress to undo it. */
492 void undoConnectWithMaster(void) {
493 int fd
= server
.repl_transfer_s
;
495 redisAssert(server
.repl_state
== REDIS_REPL_CONNECTING
);
496 aeDeleteFileEvent(server
.el
,fd
,AE_READABLE
|AE_WRITABLE
);
498 server
.repl_transfer_s
= -1;
499 server
.repl_state
= REDIS_REPL_CONNECT
;
502 void slaveofCommand(redisClient
*c
) {
503 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
504 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
505 if (server
.masterhost
) {
506 sdsfree(server
.masterhost
);
507 server
.masterhost
= NULL
;
508 if (server
.master
) freeClient(server
.master
);
509 if (server
.repl_state
== REDIS_REPL_TRANSFER
)
510 replicationAbortSyncTransfer();
511 else if (server
.repl_state
== REDIS_REPL_CONNECTING
)
512 undoConnectWithMaster();
513 server
.repl_state
= REDIS_REPL_NONE
;
514 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
519 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &port
, NULL
) != REDIS_OK
))
522 /* Check if we are already attached to the specified slave */
523 if (server
.masterhost
&& !strcasecmp(server
.masterhost
,c
->argv
[1]->ptr
)
524 && server
.masterport
== port
) {
525 redisLog(REDIS_NOTICE
,"SLAVE OF would result into synchronization with the master we are already connected with. No operation performed.");
526 addReplySds(c
,sdsnew("+OK Already connected to specified master\r\n"));
529 /* There was no previous master or the user specified a different one,
530 * we can continue. */
531 sdsfree(server
.masterhost
);
532 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
533 server
.masterport
= port
;
534 if (server
.master
) freeClient(server
.master
);
535 disconnectSlaves(); /* Force our slaves to resync with us as well. */
536 if (server
.repl_state
== REDIS_REPL_TRANSFER
)
537 replicationAbortSyncTransfer();
538 server
.repl_state
= REDIS_REPL_CONNECT
;
539 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
540 server
.masterhost
, server
.masterport
);
542 addReply(c
,shared
.ok
);
545 /* --------------------------- REPLICATION CRON ---------------------------- */
547 void replicationCron(void) {
548 /* Non blocking connection timeout? */
549 if (server
.masterhost
&& server
.repl_state
== REDIS_REPL_CONNECTING
&&
550 (time(NULL
)-server
.repl_transfer_lastio
) > server
.repl_timeout
)
552 redisLog(REDIS_WARNING
,"Timeout connecting to the MASTER...");
553 undoConnectWithMaster();
556 /* Bulk transfer I/O timeout? */
557 if (server
.masterhost
&& server
.repl_state
== REDIS_REPL_TRANSFER
&&
558 (time(NULL
)-server
.repl_transfer_lastio
) > server
.repl_timeout
)
560 redisLog(REDIS_WARNING
,"Timeout receiving bulk data from MASTER...");
561 replicationAbortSyncTransfer();
564 /* Timed out master when we are an already connected slave? */
565 if (server
.masterhost
&& server
.repl_state
== REDIS_REPL_CONNECTED
&&
566 (time(NULL
)-server
.master
->lastinteraction
) > server
.repl_timeout
)
568 redisLog(REDIS_WARNING
,"MASTER time out: no data nor PING received...");
569 freeClient(server
.master
);
572 /* Check if we should connect to a MASTER */
573 if (server
.repl_state
== REDIS_REPL_CONNECT
) {
574 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
575 if (connectWithMaster() == REDIS_OK
) {
576 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync started");
580 /* If we have attached slaves, PING them from time to time.
581 * So slaves can implement an explicit timeout to masters, and will
582 * be able to detect a link disconnection even if the TCP connection
583 * will not actually go down. */
584 if (!(server
.cronloops
% (server
.repl_ping_slave_period
*10))) {
588 listRewind(server
.slaves
,&li
);
589 while((ln
= listNext(&li
))) {
590 redisClient
*slave
= ln
->value
;
592 /* Don't ping slaves that are in the middle of a bulk transfer
593 * with the master for first synchronization. */
594 if (slave
->replstate
== REDIS_REPL_SEND_BULK
) continue;
595 if (slave
->replstate
== REDIS_REPL_ONLINE
) {
596 /* If the slave is online send a normal ping */
597 addReplySds(slave
,sdsnew("*1\r\n$4\r\nPING\r\n"));
599 /* Otherwise we are in the pre-synchronization stage.
600 * Just a newline will do the work of refreshing the
601 * connection last interaction time, and at the same time
602 * we'll be sure that being a single char there are no
603 * short-write problems. */
604 if (write(slave
->fd
, "\n", 1) == -1) {
605 /* Don't worry, it's just a ping. */