]> git.saurik.com Git - redis.git/blob - src/replication.c
Merge remote branch 'pietern/intset-split'
[redis.git] / src / replication.c
1 #include "redis.h"
2
3 #include <sys/time.h>
4 #include <unistd.h>
5 #include <fcntl.h>
6 #include <sys/stat.h>
7
8 void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
9 listNode *ln;
10 listIter li;
11 int outc = 0, j;
12 robj **outv;
13 /* We need 1+(ARGS*3) objects since commands are using the new protocol
14 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
15 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
16 robj *static_outv[REDIS_STATIC_ARGS*3+1];
17 robj *lenobj;
18
19 if (argc <= REDIS_STATIC_ARGS) {
20 outv = static_outv;
21 } else {
22 outv = zmalloc(sizeof(robj*)*(argc*3+1));
23 }
24
25 lenobj = createObject(REDIS_STRING,
26 sdscatprintf(sdsempty(), "*%d\r\n", argc));
27 lenobj->refcount = 0;
28 outv[outc++] = lenobj;
29 for (j = 0; j < argc; j++) {
30 lenobj = createObject(REDIS_STRING,
31 sdscatprintf(sdsempty(),"$%lu\r\n",
32 (unsigned long) stringObjectLen(argv[j])));
33 lenobj->refcount = 0;
34 outv[outc++] = lenobj;
35 outv[outc++] = argv[j];
36 outv[outc++] = shared.crlf;
37 }
38
39 /* Increment all the refcounts at start and decrement at end in order to
40 * be sure to free objects if there is no slave in a replication state
41 * able to be feed with commands */
42 for (j = 0; j < outc; j++) incrRefCount(outv[j]);
43 listRewind(slaves,&li);
44 while((ln = listNext(&li))) {
45 redisClient *slave = ln->value;
46
47 /* Don't feed slaves that are still waiting for BGSAVE to start */
48 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
49
50 /* Feed all the other slaves, MONITORs and so on */
51 if (slave->slaveseldb != dictid) {
52 robj *selectcmd;
53
54 switch(dictid) {
55 case 0: selectcmd = shared.select0; break;
56 case 1: selectcmd = shared.select1; break;
57 case 2: selectcmd = shared.select2; break;
58 case 3: selectcmd = shared.select3; break;
59 case 4: selectcmd = shared.select4; break;
60 case 5: selectcmd = shared.select5; break;
61 case 6: selectcmd = shared.select6; break;
62 case 7: selectcmd = shared.select7; break;
63 case 8: selectcmd = shared.select8; break;
64 case 9: selectcmd = shared.select9; break;
65 default:
66 selectcmd = createObject(REDIS_STRING,
67 sdscatprintf(sdsempty(),"select %d\r\n",dictid));
68 selectcmd->refcount = 0;
69 break;
70 }
71 addReply(slave,selectcmd);
72 slave->slaveseldb = dictid;
73 }
74 for (j = 0; j < outc; j++) addReply(slave,outv[j]);
75 }
76 for (j = 0; j < outc; j++) decrRefCount(outv[j]);
77 if (outv != static_outv) zfree(outv);
78 }
79
80 void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc) {
81 listNode *ln;
82 listIter li;
83 int j;
84 sds cmdrepr = sdsnew("+");
85 robj *cmdobj;
86 struct timeval tv;
87
88 gettimeofday(&tv,NULL);
89 cmdrepr = sdscatprintf(cmdrepr,"%ld.%ld ",(long)tv.tv_sec,(long)tv.tv_usec);
90 if (dictid != 0) cmdrepr = sdscatprintf(cmdrepr,"(db %d) ", dictid);
91
92 for (j = 0; j < argc; j++) {
93 if (argv[j]->encoding == REDIS_ENCODING_INT) {
94 cmdrepr = sdscatprintf(cmdrepr, "\"%ld\"", (long)argv[j]->ptr);
95 } else {
96 cmdrepr = sdscatrepr(cmdrepr,(char*)argv[j]->ptr,
97 sdslen(argv[j]->ptr));
98 }
99 if (j != argc-1)
100 cmdrepr = sdscatlen(cmdrepr," ",1);
101 }
102 cmdrepr = sdscatlen(cmdrepr,"\r\n",2);
103 cmdobj = createObject(REDIS_STRING,cmdrepr);
104
105 listRewind(monitors,&li);
106 while((ln = listNext(&li))) {
107 redisClient *monitor = ln->value;
108 addReply(monitor,cmdobj);
109 }
110 decrRefCount(cmdobj);
111 }
112
113 int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
114 ssize_t nwritten, ret = size;
115 time_t start = time(NULL);
116
117 timeout++;
118 while(size) {
119 if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) {
120 nwritten = write(fd,ptr,size);
121 if (nwritten == -1) return -1;
122 ptr += nwritten;
123 size -= nwritten;
124 }
125 if ((time(NULL)-start) > timeout) {
126 errno = ETIMEDOUT;
127 return -1;
128 }
129 }
130 return ret;
131 }
132
133 int syncRead(int fd, char *ptr, ssize_t size, int timeout) {
134 ssize_t nread, totread = 0;
135 time_t start = time(NULL);
136
137 timeout++;
138 while(size) {
139 if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) {
140 nread = read(fd,ptr,size);
141 if (nread == -1) return -1;
142 ptr += nread;
143 size -= nread;
144 totread += nread;
145 }
146 if ((time(NULL)-start) > timeout) {
147 errno = ETIMEDOUT;
148 return -1;
149 }
150 }
151 return totread;
152 }
153
154 int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) {
155 ssize_t nread = 0;
156
157 size--;
158 while(size) {
159 char c;
160
161 if (syncRead(fd,&c,1,timeout) == -1) return -1;
162 if (c == '\n') {
163 *ptr = '\0';
164 if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
165 return nread;
166 } else {
167 *ptr++ = c;
168 *ptr = '\0';
169 nread++;
170 }
171 }
172 return nread;
173 }
174
175 void syncCommand(redisClient *c) {
176 /* ignore SYNC if aleady slave or in monitor mode */
177 if (c->flags & REDIS_SLAVE) return;
178
179 /* Refuse SYNC requests if we are a slave but the link with our master
180 * is not ok... */
181 if (server.masterhost && server.replstate != REDIS_REPL_CONNECTED) {
182 addReplySds(c,sdsnew("-ERR Can't SYNC while not connected with my master\r\n"));
183 return;
184 }
185
186 /* SYNC can't be issued when the server has pending data to send to
187 * the client about already issued commands. We need a fresh reply
188 * buffer registering the differences between the BGSAVE and the current
189 * dataset, so that we can copy to other slaves if needed. */
190 if (listLength(c->reply) != 0) {
191 addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
192 return;
193 }
194
195 redisLog(REDIS_NOTICE,"Slave ask for synchronization");
196 /* Here we need to check if there is a background saving operation
197 * in progress, or if it is required to start one */
198 if (server.bgsavechildpid != -1) {
199 /* Ok a background save is in progress. Let's check if it is a good
200 * one for replication, i.e. if there is another slave that is
201 * registering differences since the server forked to save */
202 redisClient *slave;
203 listNode *ln;
204 listIter li;
205
206 listRewind(server.slaves,&li);
207 while((ln = listNext(&li))) {
208 slave = ln->value;
209 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
210 }
211 if (ln) {
212 /* Perfect, the server is already registering differences for
213 * another slave. Set the right state, and copy the buffer. */
214 listRelease(c->reply);
215 c->reply = listDup(slave->reply);
216 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
217 redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
218 } else {
219 /* No way, we need to wait for the next BGSAVE in order to
220 * register differences */
221 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
222 redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
223 }
224 } else {
225 /* Ok we don't have a BGSAVE in progress, let's start one */
226 redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
227 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
228 redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
229 addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n"));
230 return;
231 }
232 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
233 }
234 c->repldbfd = -1;
235 c->flags |= REDIS_SLAVE;
236 c->slaveseldb = 0;
237 listAddNodeTail(server.slaves,c);
238 return;
239 }
240
241 void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
242 redisClient *slave = privdata;
243 REDIS_NOTUSED(el);
244 REDIS_NOTUSED(mask);
245 char buf[REDIS_IOBUF_LEN];
246 ssize_t nwritten, buflen;
247
248 if (slave->repldboff == 0) {
249 /* Write the bulk write count before to transfer the DB. In theory here
250 * we don't know how much room there is in the output buffer of the
251 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
252 * operations) will never be smaller than the few bytes we need. */
253 sds bulkcount;
254
255 bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
256 slave->repldbsize);
257 if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
258 {
259 sdsfree(bulkcount);
260 freeClient(slave);
261 return;
262 }
263 sdsfree(bulkcount);
264 }
265 lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
266 buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
267 if (buflen <= 0) {
268 redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
269 (buflen == 0) ? "premature EOF" : strerror(errno));
270 freeClient(slave);
271 return;
272 }
273 if ((nwritten = write(fd,buf,buflen)) == -1) {
274 redisLog(REDIS_VERBOSE,"Write error sending DB to slave: %s",
275 strerror(errno));
276 freeClient(slave);
277 return;
278 }
279 slave->repldboff += nwritten;
280 if (slave->repldboff == slave->repldbsize) {
281 close(slave->repldbfd);
282 slave->repldbfd = -1;
283 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
284 slave->replstate = REDIS_REPL_ONLINE;
285 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
286 sendReplyToClient, slave) == AE_ERR) {
287 freeClient(slave);
288 return;
289 }
290 addReplySds(slave,sdsempty());
291 redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
292 }
293 }
294
295 /* This function is called at the end of every backgrond saving.
296 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
297 * otherwise REDIS_ERR is passed to the function.
298 *
299 * The goal of this function is to handle slaves waiting for a successful
300 * background saving in order to perform non-blocking synchronization. */
301 void updateSlavesWaitingBgsave(int bgsaveerr) {
302 listNode *ln;
303 int startbgsave = 0;
304 listIter li;
305
306 listRewind(server.slaves,&li);
307 while((ln = listNext(&li))) {
308 redisClient *slave = ln->value;
309
310 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
311 startbgsave = 1;
312 slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
313 } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
314 struct redis_stat buf;
315
316 if (bgsaveerr != REDIS_OK) {
317 freeClient(slave);
318 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
319 continue;
320 }
321 if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 ||
322 redis_fstat(slave->repldbfd,&buf) == -1) {
323 freeClient(slave);
324 redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
325 continue;
326 }
327 slave->repldboff = 0;
328 slave->repldbsize = buf.st_size;
329 slave->replstate = REDIS_REPL_SEND_BULK;
330 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
331 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
332 freeClient(slave);
333 continue;
334 }
335 }
336 }
337 if (startbgsave) {
338 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
339 listIter li;
340
341 listRewind(server.slaves,&li);
342 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
343 while((ln = listNext(&li))) {
344 redisClient *slave = ln->value;
345
346 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
347 freeClient(slave);
348 }
349 }
350 }
351 }
352
353 int syncWithMaster(void) {
354 char buf[1024], tmpfile[256], authcmd[1024];
355 long dumpsize;
356 int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
357 int dfd, maxtries = 5;
358
359 if (fd == -1) {
360 redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
361 strerror(errno));
362 return REDIS_ERR;
363 }
364
365 /* AUTH with the master if required. */
366 if(server.masterauth) {
367 snprintf(authcmd, 1024, "AUTH %s\r\n", server.masterauth);
368 if (syncWrite(fd, authcmd, strlen(server.masterauth)+7, 5) == -1) {
369 close(fd);
370 redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",
371 strerror(errno));
372 return REDIS_ERR;
373 }
374 /* Read the AUTH result. */
375 if (syncReadLine(fd,buf,1024,3600) == -1) {
376 close(fd);
377 redisLog(REDIS_WARNING,"I/O error reading auth result from MASTER: %s",
378 strerror(errno));
379 return REDIS_ERR;
380 }
381 if (buf[0] != '+') {
382 close(fd);
383 redisLog(REDIS_WARNING,"Cannot AUTH to MASTER, is the masterauth password correct?");
384 return REDIS_ERR;
385 }
386 }
387
388 /* Issue the SYNC command */
389 if (syncWrite(fd,"SYNC \r\n",7,5) == -1) {
390 close(fd);
391 redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
392 strerror(errno));
393 return REDIS_ERR;
394 }
395 /* Read the bulk write count */
396 if (syncReadLine(fd,buf,1024,3600) == -1) {
397 close(fd);
398 redisLog(REDIS_WARNING,"I/O error reading bulk count from MASTER: %s",
399 strerror(errno));
400 return REDIS_ERR;
401 }
402 if (buf[0] == '-') {
403 close(fd);
404 redisLog(REDIS_WARNING,"MASTER aborted replication with an error: %s",
405 buf+1);
406 return REDIS_ERR;
407 } else if (buf[0] != '$') {
408 close(fd);
409 redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
410 return REDIS_ERR;
411 }
412 dumpsize = strtol(buf+1,NULL,10);
413 redisLog(REDIS_NOTICE,"Receiving %ld bytes data dump from MASTER",dumpsize);
414 /* Read the bulk write data on a temp file */
415 while(maxtries--) {
416 snprintf(tmpfile,256,
417 "temp-%d.%ld.rdb",(int)time(NULL),(long int)getpid());
418 dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644);
419 if (dfd != -1) break;
420 sleep(1);
421 }
422 if (dfd == -1) {
423 close(fd);
424 redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
425 return REDIS_ERR;
426 }
427 while(dumpsize) {
428 int nread, nwritten;
429
430 nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024);
431 if (nread <= 0) {
432 redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
433 (nread == -1) ? strerror(errno) : "connection lost");
434 close(fd);
435 close(dfd);
436 return REDIS_ERR;
437 }
438 nwritten = write(dfd,buf,nread);
439 if (nwritten == -1) {
440 redisLog(REDIS_WARNING,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno));
441 close(fd);
442 close(dfd);
443 return REDIS_ERR;
444 }
445 dumpsize -= nread;
446 }
447 close(dfd);
448 if (rename(tmpfile,server.dbfilename) == -1) {
449 redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
450 unlink(tmpfile);
451 close(fd);
452 return REDIS_ERR;
453 }
454 emptyDb();
455 if (rdbLoad(server.dbfilename) != REDIS_OK) {
456 redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
457 close(fd);
458 return REDIS_ERR;
459 }
460 server.master = createClient(fd);
461 server.master->flags |= REDIS_MASTER;
462 server.master->authenticated = 1;
463 server.replstate = REDIS_REPL_CONNECTED;
464 return REDIS_OK;
465 }
466
467 void slaveofCommand(redisClient *c) {
468 if (!strcasecmp(c->argv[1]->ptr,"no") &&
469 !strcasecmp(c->argv[2]->ptr,"one")) {
470 if (server.masterhost) {
471 sdsfree(server.masterhost);
472 server.masterhost = NULL;
473 if (server.master) freeClient(server.master);
474 server.replstate = REDIS_REPL_NONE;
475 redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
476 }
477 } else {
478 sdsfree(server.masterhost);
479 server.masterhost = sdsdup(c->argv[1]->ptr);
480 server.masterport = atoi(c->argv[2]->ptr);
481 if (server.master) freeClient(server.master);
482 server.replstate = REDIS_REPL_CONNECT;
483 redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
484 server.masterhost, server.masterport);
485 }
486 addReply(c,shared.ok);
487 }