X-Git-Url: https://git.saurik.com/redis.git/blobdiff_plain/4b72c5617f1b9a53daad5bb58bfc7faf05c8169a..1793752d97d72c82ce237b461165d5a06c44587e:/src/cluster.c?ds=sidebyside diff --git a/src/cluster.c b/src/cluster.c index 1ffe0cdc..6d117aca 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -11,6 +11,9 @@ void clusterSendFail(char *nodename); void clusterUpdateState(void); int clusterNodeGetSlotBit(clusterNode *n, int slot); sds clusterGenNodesDescription(void); +clusterNode *clusterLookupNode(char *name); +int clusterNodeAddSlave(clusterNode *master, clusterNode *slave); +int clusterAddSlot(clusterNode *n, int slot); /* ----------------------------------------------------------------------------- * Initialization @@ -34,17 +37,103 @@ void clusterGetRandomName(char *p) { int clusterLoadConfig(char *filename) { FILE *fp = fopen(filename,"r"); + char *line; + int maxline, j; - return REDIS_ERR; if (fp == NULL) return REDIS_ERR; + + /* Parse the file. Note that single liens of the cluster config file can + * be really long as they include all the hash slots of the node. + * This means in the worst possible case REDIS_CLUSTER_SLOTS/2 integers. + * To simplify we allocate 1024+REDIS_CLUSTER_SLOTS*16 bytes per line. */ + maxline = 1024+REDIS_CLUSTER_SLOTS*16; + line = zmalloc(maxline); + while(fgets(line,maxline,fp) != NULL) { + int argc; + sds *argv = sdssplitargs(line,&argc); + clusterNode *n, *master; + char *p, *s; + + /* Create this node if it does not exist */ + n = clusterLookupNode(argv[0]); + if (!n) { + n = createClusterNode(argv[0],0); + clusterAddNode(n); + } + /* Address and port */ + if ((p = strchr(argv[1],':')) == NULL) goto fmterr; + *p = '\0'; + memcpy(n->ip,argv[1],strlen(argv[1])+1); + n->port = atoi(p+1); + + /* Parse flags */ + p = s = argv[2]; + while(p) { + p = strchr(s,','); + if (p) *p = '\0'; + if (!strcasecmp(s,"myself")) { + redisAssert(server.cluster.myself == NULL); + server.cluster.myself = n; + n->flags |= REDIS_NODE_MYSELF; + } else if (!strcasecmp(s,"master")) { + n->flags |= REDIS_NODE_MASTER; + } else if (!strcasecmp(s,"slave")) { + n->flags |= REDIS_NODE_SLAVE; + } else if (!strcasecmp(s,"fail?")) { + n->flags |= REDIS_NODE_PFAIL; + } else if (!strcasecmp(s,"fail")) { + n->flags |= REDIS_NODE_FAIL; + } else if (!strcasecmp(s,"handshake")) { + n->flags |= REDIS_NODE_HANDSHAKE; + } else if (!strcasecmp(s,"noaddr")) { + n->flags |= REDIS_NODE_NOADDR; + } else if (!strcasecmp(s,"noflags")) { + /* nothing to do */ + } else { + redisPanic("Unknown flag in redis cluster config file"); + } + if (p) s = p+1; + } + + /* Get master if any. Set the master and populate master's + * slave list. */ + if (argv[3][0] != '-') { + master = clusterLookupNode(argv[3]); + if (!master) { + master = createClusterNode(argv[3],0); + clusterAddNode(master); + } + n->slaveof = master; + clusterNodeAddSlave(master,n); + } + + /* Populate hash slots served by this instance. */ + for (j = 7; j < argc; j++) { + int start, stop; + + if ((p = strchr(argv[j],'-')) != NULL) { + *p = '\0'; + start = atoi(argv[j]); + stop = atoi(p+1); + } else { + start = stop = atoi(argv[j]); + } + while(start <= stop) clusterAddSlot(n, start++); + } + + sdssplitargs_free(argv,argc); + } + zfree(line); fclose(fp); + /* Config sanity check */ + redisAssert(server.cluster.myself != NULL); redisLog(REDIS_NOTICE,"Node configuration loaded, I'm %.40s", server.cluster.myself->name); return REDIS_OK; fmterr: - redisLog(REDIS_WARNING,"Unrecovarable error: corrupted redis-cluster.conf file."); + redisLog(REDIS_WARNING,"Unrecovarable error: corrupted cluster config file."); fclose(fp); exit(1); } @@ -53,11 +142,12 @@ fmterr: * * This function writes the node config and returns 0, on error -1 * is returned. */ -int clusterSaveConfig(char *filename) { +int clusterSaveConfig(void) { sds ci = clusterGenNodesDescription(); int fd; - if ((fd = open(filename,O_WRONLY|O_CREAT,0644)) == -1) goto err; + if ((fd = open(server.cluster.configfile,O_WRONLY|O_CREAT|O_TRUNC,0644)) + == -1) goto err; if (write(fd,ci,sdslen(ci)) != (ssize_t)sdslen(ci)) goto err; close(fd); sdsfree(ci); @@ -68,10 +158,17 @@ err: return -1; } +void clusterSaveConfigOrDie(void) { + if (clusterSaveConfig() == -1) { + redisLog(REDIS_WARNING,"Fatal: can't update cluster config file."); + exit(1); + } +} + void clusterInit(void) { int saveconf = 0; - server.cluster.myself = createClusterNode(NULL,REDIS_NODE_MYSELF); + server.cluster.myself = NULL; server.cluster.state = REDIS_CLUSTER_FAIL; server.cluster.nodes = dictCreate(&clusterNodesDictType,NULL); server.cluster.node_timeout = 15; @@ -81,20 +178,16 @@ void clusterInit(void) { sizeof(server.cluster.importing_slots_from)); memset(server.cluster.slots,0, sizeof(server.cluster.slots)); - if (clusterLoadConfig("redis-cluster.conf") == REDIS_ERR) { + if (clusterLoadConfig(server.cluster.configfile) == REDIS_ERR) { /* No configuration found. We will just use the random name provided * by the createClusterNode() function. */ + server.cluster.myself = createClusterNode(NULL,REDIS_NODE_MYSELF); redisLog(REDIS_NOTICE,"No cluster configuration found, I'm %.40s", server.cluster.myself->name); + clusterAddNode(server.cluster.myself); saveconf = 1; } - clusterAddNode(server.cluster.myself); - if (saveconf) { - if (clusterSaveConfig("redis-cluster.conf") == -1) { - redisLog(REDIS_WARNING,"Fatal: can't update cluster config file."); - exit(1); - } - } + if (saveconf) clusterSaveConfigOrDie(); /* We need a listening TCP port for our cluster messaging needs */ server.cfd = anetTcpServer(server.neterr, server.port+REDIS_CLUSTER_PORT_INCR, server.bindaddr); @@ -334,6 +427,7 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) { /* Broadcast the failing node name to everybody */ clusterSendFail(node->name); clusterUpdateState(); + clusterSaveConfigOrDie(); } } else { /* If it's not in NOADDR state and we don't have it, we @@ -412,6 +506,7 @@ int clusterProcessPacket(clusterLink *link) { sender = clusterLookupNode(hdr->sender); if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_MEET) { + int update_config = 0; redisLog(REDIS_DEBUG,"Ping packet received: %p", link->node); /* Add this node if it is new for us and the msg type is MEET. @@ -425,6 +520,7 @@ int clusterProcessPacket(clusterLink *link) { nodeIp2String(node->ip,link); node->port = ntohs(hdr->port); clusterAddNode(node); + update_config = 1; } /* Get info from the gossip section */ @@ -432,8 +528,12 @@ int clusterProcessPacket(clusterLink *link) { /* Anyway reply with a PONG */ clusterSendPing(link,CLUSTERMSG_TYPE_PONG); + + /* Update config if needed */ + if (update_config) clusterSaveConfigOrDie(); } else if (type == CLUSTERMSG_TYPE_PONG) { - int update = 0; + int update_state = 0; + int update_config = 0; redisLog(REDIS_DEBUG,"Pong packet received: %p", link->node); if (link->node) { @@ -454,6 +554,7 @@ int clusterProcessPacket(clusterLink *link) { redisLog(REDIS_DEBUG,"Handshake with node %.40s completed.", link->node->name); link->node->flags &= ~REDIS_NODE_HANDSHAKE; + update_config = 1; } else if (memcmp(link->node->name,hdr->sender, REDIS_CLUSTER_NAMELEN) != 0) { @@ -463,6 +564,7 @@ int clusterProcessPacket(clusterLink *link) { redisLog(REDIS_DEBUG,"PONG contains mismatching sender ID"); link->node->flags |= REDIS_NODE_NOADDR; freeClusterLink(link); + update_config = 1; /* FIXME: remove this node if we already have it. * * If we already have it but the IP is different, use @@ -508,7 +610,7 @@ int clusterProcessPacket(clusterLink *link) { server.cluster.slots[j]->flags & REDIS_NODE_FAIL) { server.cluster.slots[j] = sender; - update = 1; + update_state = update_config = 1; } } } @@ -519,7 +621,8 @@ int clusterProcessPacket(clusterLink *link) { clusterProcessGossipSection(hdr,link); /* Update the cluster state if needed */ - if (update) clusterUpdateState(); + if (update_state) clusterUpdateState(); + if (update_config) clusterSaveConfigOrDie(); } else if (type == CLUSTERMSG_TYPE_FAIL && sender) { clusterNode *failing; @@ -531,6 +634,7 @@ int clusterProcessPacket(clusterLink *link) { failing->flags |= REDIS_NODE_FAIL; failing->flags &= ~REDIS_NODE_PFAIL; clusterUpdateState(); + clusterSaveConfigOrDie(); } } else { redisLog(REDIS_NOTICE,"Received unknown packet type: %d", type); @@ -781,7 +885,7 @@ void clusterCron(void) { * normal PING packets. */ node->flags &= ~REDIS_NODE_MEET; - redisLog(REDIS_NOTICE,"Connecting with Node %.40s at %s:%d\n", node->name, node->ip, node->port+REDIS_CLUSTER_PORT_INCR); + redisLog(REDIS_NOTICE,"Connecting with Node %.40s at %s:%d", node->name, node->ip, node->port+REDIS_CLUSTER_PORT_INCR); } } dictReleaseIterator(di); @@ -910,6 +1014,7 @@ sds clusterGenNodesDescription(void) { sds ci = sdsempty(); dictIterator *di; dictEntry *de; + int j, start; di = dictGetIterator(server.cluster.nodes); while((de = dictNext(di)) != NULL) { @@ -939,10 +1044,31 @@ sds clusterGenNodesDescription(void) { ci = sdscatprintf(ci,"- "); /* Latency from the POV of this node, link status */ - ci = sdscatprintf(ci,"%ld %ld %s\n", + ci = sdscatprintf(ci,"%ld %ld %s", (long) node->ping_sent, (long) node->pong_received, node->link ? "connected" : "disconnected"); + + /* Slots served by this instance */ + start = -1; + for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) { + int bit; + + if ((bit = clusterNodeGetSlotBit(node,j)) != 0) { + if (start == -1) start = j; + } + if (start != -1 && (!bit || j == REDIS_CLUSTER_SLOTS-1)) { + if (j == REDIS_CLUSTER_SLOTS-1) j++; + + if (start == j-1) { + ci = sdscatprintf(ci," %d",start); + } else { + ci = sdscatprintf(ci," %d-%d",start,j-1); + } + start = -1; + } + } + ci = sdscatlen(ci,"\n",1); } dictReleaseIterator(di); return ci; @@ -1022,6 +1148,7 @@ void clusterCommand(redisClient *c) { } zfree(slots); clusterUpdateState(); + clusterSaveConfigOrDie(); addReply(c,shared.ok); } else if (!strcasecmp(c->argv[1]->ptr,"info") && c->argc == 2) { char *statestr[] = {"ok","fail","needhelp"}; @@ -1115,8 +1242,10 @@ void restoreCommand(redisClient *c) { /* Finally create the object from the serialized dump and * store it at the specified key. */ - o = rdbLoadObject(data[0],fp); - if (o == NULL) { + if ((data[0] > 4 && data[0] < 9) || + data[0] > 11 || + (o = rdbLoadObject(data[0],fp)) == NULL) + { addReplyError(c,"Bad data format."); fclose(fp); return; @@ -1245,6 +1374,7 @@ file_wr_err: strerror(errno)); fclose(fp); close(fd); + return; file_rd_err: redisLog(REDIS_WARNING,"Can't read from tmp file for MIGRATE: %s", @@ -1253,6 +1383,7 @@ file_rd_err: strerror(errno)); fclose(fp); close(fd); + return; socket_wr_err: redisLog(REDIS_NOTICE,"Can't write to target node for MIGRATE: %s", @@ -1261,6 +1392,7 @@ socket_wr_err: strerror(errno)); fclose(fp); close(fd); + return; socket_rd_err: redisLog(REDIS_NOTICE,"Can't read from target node for MIGRATE: %s", @@ -1269,6 +1401,81 @@ socket_rd_err: strerror(errno)); fclose(fp); close(fd); + return; +} + +/* DUMP keyname + * DUMP is actually not used by Redis Cluster but it is the obvious + * complement of RESTORE and can be useful for different applications. */ +void dumpCommand(redisClient *c) { + char buf[64]; + FILE *fp; + robj *o, *dumpobj; + sds dump = NULL; + off_t payload_len; + unsigned int type; + + /* Check if the key is here. */ + if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) { + addReply(c,shared.nullbulk); + return; + } + + /* Create temp file */ + snprintf(buf,sizeof(buf),"redis-dump-%d.tmp",getpid()); + fp = fopen(buf,"w+"); + if (!fp) { + redisLog(REDIS_WARNING,"Can't open tmp file for MIGRATE: %s", + strerror(errno)); + addReplyErrorFormat(c,"DUMP failed, tmp file creation error: %s.", + strerror(errno)); + return; + } + unlink(buf); + + /* Dump the serailized object and read it back in memory. + * We prefix it with a one byte containing the type ID. + * This is the serialization format understood by RESTORE. */ + if (rdbSaveObject(fp,o) == -1) goto file_wr_err; + payload_len = ftello(fp); + if (fseeko(fp,0,SEEK_SET) == -1) goto file_rd_err; + dump = sdsnewlen(NULL,payload_len+1); + if (payload_len && fread(dump+1,payload_len,1,fp) != 1) goto file_rd_err; + fclose(fp); + type = o->type; + if (type == REDIS_LIST && o->encoding == REDIS_ENCODING_ZIPLIST) + type = REDIS_LIST_ZIPLIST; + else if (type == REDIS_HASH && o->encoding == REDIS_ENCODING_ZIPMAP) + type = REDIS_HASH_ZIPMAP; + else if (type == REDIS_SET && o->encoding == REDIS_ENCODING_INTSET) + type = REDIS_SET_INTSET; + else + type = o->type; + dump[0] = type; + + /* Transfer to the client */ + dumpobj = createObject(REDIS_STRING,dump); + addReplyBulk(c,dumpobj); + decrRefCount(dumpobj); + return; + +file_wr_err: + redisLog(REDIS_WARNING,"Can't write on tmp file for DUMP: %s", + strerror(errno)); + addReplyErrorFormat(c,"DUMP failed, tmp file write error: %s.", + strerror(errno)); + sdsfree(dump); + fclose(fp); + return; + +file_rd_err: + redisLog(REDIS_WARNING,"Can't read from tmp file for DUMP: %s", + strerror(errno)); + addReplyErrorFormat(c,"DUMP failed, tmp file read error: %s.", + strerror(errno)); + sdsfree(dump); + fclose(fp); + return; } /* -----------------------------------------------------------------------------