#include "adlist.h" /* Linked lists */
#include "zmalloc.h" /* total memory usage aware version of malloc/free */
#include "anet.h" /* Networking the easy way */
-#include "zipmap.h" /* Compact string -> string data structure */
#include "ziplist.h" /* Compact list data structure */
#include "intset.h" /* Compact integer set structure */
#include "version.h" /* Version macro */
#define REDIS_CONFIGLINE_MAX 1024
#define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
+#define REDIS_SHARED_SELECT_CMDS 10
#define REDIS_SHARED_INTEGERS 10000
#define REDIS_SHARED_BULKHDR_LEN 32
#define REDIS_MAX_LOGMSG_LEN 1024 /* Default maximum length of syslog messages */
#define REDIS_AOF_REWRITE_MIN_SIZE (1024*1024)
#define REDIS_AOF_REWRITE_ITEMS_PER_CMD 64
#define REDIS_SLOWLOG_LOG_SLOWER_THAN 10000
-#define REDIS_SLOWLOG_MAX_LEN 64
+#define REDIS_SLOWLOG_MAX_LEN 128
#define REDIS_MAX_CLIENTS 10000
#define REDIS_REPL_TIMEOUT 60
#define REDIS_REPL_PING_SLAVE_PERIOD 10
+#define REDIS_RUN_ID_SIZE 40
+#define REDIS_OPS_SEC_SAMPLES 16
+
/* Protocol and I/O related defines */
#define REDIS_MAX_QUERYBUF_LEN (1024*1024*1024) /* 1GB max query buffer. */
#define REDIS_IOBUF_LEN (1024*16) /* Generic I/O buffer size */
#define REDIS_SET 2
#define REDIS_ZSET 3
#define REDIS_HASH 4
-#define REDIS_VMPOINTER 8
/* Objects encoding. Some kind of objects like Strings and Hashes can be
* internally represented in multiple ways. The 'encoding' field of the object
#define AOF_FSYNC_EVERYSEC 2
/* Zip structure related defaults */
-#define REDIS_HASH_MAX_ZIPMAP_ENTRIES 512
-#define REDIS_HASH_MAX_ZIPMAP_VALUE 64
+#define REDIS_HASH_MAX_ZIPLIST_ENTRIES 512
+#define REDIS_HASH_MAX_ZIPLIST_VALUE 64
#define REDIS_LIST_MAX_ZIPLIST_ENTRIES 512
#define REDIS_LIST_MAX_ZIPLIST_VALUE 64
#define REDIS_SET_MAX_INTSET_ENTRIES 512
#define REDIS_CALL_PROPAGATE 4
#define REDIS_CALL_FULL (REDIS_CALL_SLOWLOG | REDIS_CALL_STATS | REDIS_CALL_PROPAGATE)
+/* Command propagation flags, see propagate() function */
+#define REDIS_PROPAGATE_NONE 0
+#define REDIS_PROPAGATE_AOF 1
+#define REDIS_PROPAGATE_REPL 2
+
/* We can print the stacktrace, so our assert is defined this way: */
#define redisAssertWithInfo(_c,_o,_e) ((_e)?(void)0 : (_redisAssertWithInfo(_c,_o,#_e,__FILE__,__LINE__),_exit(1)))
#define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
redisDb *db;
int dictid;
sds querybuf;
+ size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size */
int argc;
robj **argv;
struct redisCommand *cmd, *lastcmd;
list *reply;
unsigned long reply_bytes; /* Tot bytes of objects in reply list */
int sentlen;
+ time_t ctime; /* Client creation time */
time_t lastinteraction; /* time of the last interaction, used for timeout */
time_t obuf_soft_limit_reached_time;
int flags; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *cnegone, *pong, *space,
*colon, *nullbulk, *nullmultibulk, *queued,
*emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
- *outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *plus,
- *select0, *select1, *select2, *select3, *select4,
- *select5, *select6, *select7, *select8, *select9,
- *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk,
- *psubscribebulk, *punsubscribebulk, *del,
+ *outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *bgsaveerr,
+ *masterdownerr, *roslaveerr,
+ *oomerr, *plus, *messagebulk, *pmessagebulk, *subscribebulk,
+ *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *rpop, *lpop,
+ *select[REDIS_SHARED_SELECT_CMDS],
*integers[REDIS_SHARED_INTEGERS],
*mbulkhdr[REDIS_SHARED_BULKHDR_LEN], /* "*<value>\r\n" */
*bulkhdr[REDIS_SHARED_BULKHDR_LEN]; /* "$<value>\r\n" */
time_t soft_limit_seconds;
} clientBufferLimitsConfig;
-/*-----------------------------------------------------------------------------
- * Redis cluster data structures
- *----------------------------------------------------------------------------*/
-
-#define REDIS_CLUSTER_SLOTS 4096
-#define REDIS_CLUSTER_OK 0 /* Everything looks ok */
-#define REDIS_CLUSTER_FAIL 1 /* The cluster can't work */
-#define REDIS_CLUSTER_NEEDHELP 2 /* The cluster works, but needs some help */
-#define REDIS_CLUSTER_NAMELEN 40 /* sha1 hex length */
-#define REDIS_CLUSTER_PORT_INCR 10000 /* Cluster port = baseport + PORT_INCR */
-
-struct clusterNode;
-
-/* clusterLink encapsulates everything needed to talk with a remote node. */
-typedef struct clusterLink {
- int fd; /* TCP socket file descriptor */
- sds sndbuf; /* Packet send buffer */
- sds rcvbuf; /* Packet reception buffer */
- struct clusterNode *node; /* Node related to this link if any, or NULL */
-} clusterLink;
-
-/* Node flags */
-#define REDIS_NODE_MASTER 1 /* The node is a master */
-#define REDIS_NODE_SLAVE 2 /* The node is a slave */
-#define REDIS_NODE_PFAIL 4 /* Failure? Need acknowledge */
-#define REDIS_NODE_FAIL 8 /* The node is believed to be malfunctioning */
-#define REDIS_NODE_MYSELF 16 /* This node is myself */
-#define REDIS_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */
-#define REDIS_NODE_NOADDR 64 /* We don't know the address of this node */
-#define REDIS_NODE_MEET 128 /* Send a MEET message to this node */
-#define REDIS_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
-
-struct clusterNode {
- char name[REDIS_CLUSTER_NAMELEN]; /* Node name, hex string, sha1-size */
- int flags; /* REDIS_NODE_... */
- unsigned char slots[REDIS_CLUSTER_SLOTS/8]; /* slots handled by this node */
- int numslaves; /* Number of slave nodes, if this is a master */
- struct clusterNode **slaves; /* pointers to slave nodes */
- struct clusterNode *slaveof; /* pointer to the master node */
- time_t ping_sent; /* Unix time we sent latest ping */
- time_t pong_received; /* Unix time we received the pong */
- char *configdigest; /* Configuration digest of this node */
- time_t configdigest_ts; /* Configuration digest timestamp */
- char ip[16]; /* Latest known IP address of this node */
- int port; /* Latest known port of this node */
- clusterLink *link; /* TCP/IP link with this node */
-};
-typedef struct clusterNode clusterNode;
-
-typedef struct {
- char *configfile;
- clusterNode *myself; /* This node */
- int state; /* REDIS_CLUSTER_OK, REDIS_CLUSTER_FAIL, ... */
- int node_timeout;
- dict *nodes; /* Hash table of name -> clusterNode structures */
- clusterNode *migrating_slots_to[REDIS_CLUSTER_SLOTS];
- clusterNode *importing_slots_from[REDIS_CLUSTER_SLOTS];
- clusterNode *slots[REDIS_CLUSTER_SLOTS];
- zskiplist *slots_to_keys;
-} clusterState;
-
-/* Redis cluster messages header */
-
-/* Note that the PING, PONG and MEET messages are actually the same exact
- * kind of packet. PONG is the reply to ping, in the extact format as a PING,
- * while MEET is a special PING that forces the receiver to add the sender
- * as a node (if it is not already in the list). */
-#define CLUSTERMSG_TYPE_PING 0 /* Ping */
-#define CLUSTERMSG_TYPE_PONG 1 /* Pong (reply to Ping) */
-#define CLUSTERMSG_TYPE_MEET 2 /* Meet "let's join" message */
-#define CLUSTERMSG_TYPE_FAIL 3 /* Mark node xxx as failing */
-#define CLUSTERMSG_TYPE_PUBLISH 4 /* Pub/Sub Publish propatagion */
-
-/* Initially we don't know our "name", but we'll find it once we connect
- * to the first node, using the getsockname() function. Then we'll use this
- * address for all the next messages. */
-typedef struct {
- char nodename[REDIS_CLUSTER_NAMELEN];
- uint32_t ping_sent;
- uint32_t pong_received;
- char ip[16]; /* IP address last time it was seen */
- uint16_t port; /* port last time it was seen */
- uint16_t flags;
- uint32_t notused; /* for 64 bit alignment */
-} clusterMsgDataGossip;
-
-typedef struct {
- char nodename[REDIS_CLUSTER_NAMELEN];
-} clusterMsgDataFail;
-
-typedef struct {
- uint32_t channel_len;
- uint32_t message_len;
- unsigned char bulk_data[8]; /* defined as 8 just for alignment concerns. */
-} clusterMsgDataPublish;
-
-union clusterMsgData {
- /* PING, MEET and PONG */
- struct {
- /* Array of N clusterMsgDataGossip structures */
- clusterMsgDataGossip gossip[1];
- } ping;
-
- /* FAIL */
- struct {
- clusterMsgDataFail about;
- } fail;
-
- /* PUBLISH */
- struct {
- clusterMsgDataPublish msg;
- } publish;
-};
+/* The redisOp structure defines a Redis Operation, that is an instance of
+ * a command with an argument vector, database ID, propagation target
+ * (REDIS_PROPAGATE_*), and command pointer.
+ *
+ * Currently only used to additionally propagate more commands to AOF/Replication
+ * after the propagation of the executed command. */
+typedef struct redisOp {
+ robj **argv;
+ int argc, dbid, target;
+ struct redisCommand *cmd;
+} redisOp;
-typedef struct {
- uint32_t totlen; /* Total length of this message */
- uint16_t type; /* Message type */
- uint16_t count; /* Only used for some kind of messages. */
- char sender[REDIS_CLUSTER_NAMELEN]; /* Name of the sender node */
- unsigned char myslots[REDIS_CLUSTER_SLOTS/8];
- char slaveof[REDIS_CLUSTER_NAMELEN];
- char configdigest[32];
- uint16_t port; /* Sender TCP base port */
- unsigned char state; /* Cluster state from the POV of the sender */
- unsigned char notused[5]; /* Reserved for future use. For alignment. */
- union clusterMsgData data;
-} clusterMsg;
+/* Defines an array of Redis operations. There is an API to add to this
+ * structure in a easy way.
+ *
+ * redisOpArrayInit();
+ * redisOpArrayAppend();
+ * redisOpArrayFree();
+ */
+typedef struct redisOpArray {
+ redisOp *ops;
+ int numops;
+} redisOpArray;
/*-----------------------------------------------------------------------------
* Global server state
char *requirepass; /* Pass for AUTH command, or NULL */
char *pidfile; /* PID file path */
int arch_bits; /* 32 or 64 depending on sizeof(long) */
+ int cronloops; /* Number of times the cron function run */
+ char runid[REDIS_RUN_ID_SIZE+1]; /* ID always different at every exec. */
/* Networking */
int port; /* TCP listening port */
char *bindaddr; /* Bind address or NULL */
mode_t unixsocketperm; /* UNIX socket permission */
int ipfd; /* TCP socket file descriptor */
int sofd; /* Unix socket file descriptor */
- int cfd; /* Cluster bus lisetning socket */
list *clients; /* List of active clients */
list *clients_to_close; /* Clients to close asynchronously */
list *slaves, *monitors; /* List of slaves and MONITORs */
off_t loading_loaded_bytes;
time_t loading_start_time;
/* Fast pointers to often looked up command */
- struct redisCommand *delCommand, *multiCommand;
- int cronloops; /* Number of times the cron function run */
- time_t lastsave; /* Unix time of last save succeeede */
+ struct redisCommand *delCommand, *multiCommand, *lpushCommand;
/* Fields used only for stats */
time_t stat_starttime; /* Server start time */
long long stat_numcommands; /* Number of processed commands */
long long slowlog_entry_id; /* SLOWLOG current entry ID */
long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */
unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */
+ /* The following two are used to track instantaneous "load" in terms
+ * of operations per second. */
+ long long ops_sec_last_sample_time; /* Timestamp of last sample (in ms) */
+ long long ops_sec_last_sample_ops; /* numcommands in last sample */
+ long long ops_sec_samples[REDIS_OPS_SEC_SAMPLES];
+ int ops_sec_idx;
/* Configuration */
int verbosity; /* Loglevel in redis.conf */
int maxidletime; /* Client timeout in seconds */
int aof_selected_db; /* Currently selected DB in AOF */
time_t aof_flush_postponed_start; /* UNIX time of postponed AOF flush */
time_t aof_last_fsync; /* UNIX time of last fsync() */
+ unsigned long aof_delayed_fsync; /* delayed AOF fsync() counter */
/* RDB persistence */
long long dirty; /* Changes to DB from the last save */
long long dirty_before_bgsave; /* Used to restore dirty on failed BGSAVE */
int saveparamslen; /* Number of saving points */
char *rdb_filename; /* Name of RDB file */
int rdb_compression; /* Use compression in RDB? */
+ int rdb_checksum; /* Use RDB checksum? */
+ time_t lastsave; /* Unix time of last save succeeede */
+ int lastbgsave_status; /* REDIS_OK or REDIS_ERR */
+ int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */
+ /* Propagation of commands in AOF / replication */
+ redisOpArray also_propagate; /* Additional command to propagate. */
/* Logging */
char *logfile; /* Path of log file */
int syslog_enabled; /* Is syslog enabled? */
char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */
time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
int repl_serve_stale_data; /* Serve stale data when link is down? */
+ int repl_slave_ro; /* Slave is read only? */
time_t repl_down_since; /* Unix time at which link with master went down */
/* Limits */
unsigned int maxclients; /* Max number of simultaneous clients */
int sort_alpha;
int sort_bypattern;
/* Zip structure config, see redis.conf for more information */
- size_t hash_max_zipmap_entries;
- size_t hash_max_zipmap_value;
+ size_t hash_max_ziplist_entries;
+ size_t hash_max_ziplist_value;
size_t list_max_ziplist_entries;
size_t list_max_ziplist_value;
size_t set_max_intset_entries;
/* Pubsub */
dict *pubsub_channels; /* Map channels to list of subscribed clients */
list *pubsub_patterns; /* A list of pubsub_patterns */
- /* Cluster */
- int cluster_enabled; /* Is cluster enabled? */
- clusterState cluster; /* State of the cluster */
/* Scripting */
lua_State *lua; /* The Lua interpreter. We use just one for all clients */
redisClient *lua_client; /* The "fake client" to query Redis from Lua */
char *assert_file;
int assert_line;
int bug_report_start; /* True if bug report header was already logged. */
+ int watchdog_period; /* Software watchdog period in ms. 0 = off */
};
typedef struct pubsubPattern {
int arity;
char *sflags; /* Flags as string represenation, one char per flag. */
int flags; /* The actual flags, obtained from the 'sflags' field. */
- /* Use a function to determine keys arguments in a command line.
- * Used for Redis Cluster redirect. */
+ /* Use a function to determine keys arguments in a command line. */
redisGetKeysProc *getkeys_proc;
/* What keys should be loaded in background when calling this command? */
int firstkey; /* The first argument that's a key (0 = no keys) */
* not both are required, store pointers in the iterator to avoid
* unnecessary memory allocation for fields/values. */
typedef struct {
+ robj *subject;
int encoding;
- unsigned char *zi;
- unsigned char *zk, *zv;
- unsigned int zklen, zvlen;
+
+ unsigned char *fptr, *vptr;
dictIterator *di;
dictEntry *de;
extern struct sharedObjectsStruct shared;
extern dictType setDictType;
extern dictType zsetDictType;
-extern dictType clusterNodesDictType;
extern dictType dbDictType;
extern double R_Zero, R_PosInf, R_NegInf, R_Nan;
-dictType hashDictType;
+extern dictType hashDictType;
/*-----------------------------------------------------------------------------
* Functions prototypes
/* Utils */
long long ustime(void);
long long mstime(void);
+void getRandomHexChars(char *p, unsigned int len);
+uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
+void exitFromChild(int retcode);
/* networking.c -- Networking and Client related operations */
redisClient *createClient(int fd);
void asyncCloseClientOnOutputBufferLimitReached(redisClient *c);
int getClientLimitClassByName(char *name);
char *getClientLimitClassName(int class);
+void flushSlavesOutputBuffers(void);
+void disconnectSlaves(void);
#ifdef __GNUC__
void addReplyErrorFormat(redisClient *c, const char *fmt, ...)
void queueMultiCommand(redisClient *c);
void touchWatchedKey(redisDb *db, robj *key);
void touchWatchedKeysOnFlush(int dbid);
+void discardTransaction(redisClient *c);
/* Redis object implementation */
void decrRefCount(void *o);
unsigned long estimateObjectIdleTime(robj *o);
/* Synchronous I/O with timeout */
-int syncWrite(int fd, char *ptr, ssize_t size, int timeout);
-int syncRead(int fd, char *ptr, ssize_t size, int timeout);
-int syncReadLine(int fd, char *ptr, ssize_t size, int timeout);
+ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout);
+ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout);
+ssize_t syncReadLine(int fd, char *ptr, ssize_t size, long long timeout);
/* Replication */
void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc);
-void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc);
+void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **argv, int argc);
void updateSlavesWaitingBgsave(int bgsaveerr);
void replicationCron(void);
struct redisCommand *lookupCommand(sds name);
struct redisCommand *lookupCommandByCString(char *s);
void call(redisClient *c, int flags);
+void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int flags);
+void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int target);
int prepareForShutdown();
void redisLog(int level, const char *fmt, ...);
void redisLogRaw(int level, const char *msg);
+void redisLogFromHandler(int level, const char *msg);
void usage();
void updateDictResizePolicy(void);
int htNeedsResize(dict *dict);
void setTypeConvert(robj *subject, int enc);
/* Hash data type */
-void convertToRealHash(robj *o);
+void hashTypeConvert(robj *o, int enc);
void hashTypeTryConversion(robj *subject, robj **argv, int start, int end);
void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2);
-int hashTypeGet(robj *o, robj *key, robj **objval, unsigned char **v, unsigned int *vlen);
robj *hashTypeGetObject(robj *o, robj *key);
int hashTypeExists(robj *o, robj *key);
int hashTypeSet(robj *o, robj *key, robj *value);
hashTypeIterator *hashTypeInitIterator(robj *subject);
void hashTypeReleaseIterator(hashTypeIterator *hi);
int hashTypeNext(hashTypeIterator *hi);
-int hashTypeCurrent(hashTypeIterator *hi, int what, robj **objval, unsigned char **v, unsigned int *vlen);
+void hashTypeCurrentFromZiplist(hashTypeIterator *hi, int what,
+ unsigned char **vstr,
+ unsigned int *vlen,
+ long long *vll);
+void hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what, robj **dst);
robj *hashTypeCurrentObject(hashTypeIterator *hi, int what);
robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key);
int *renameGetKeys(struct redisCommand *cmd,robj **argv, int argc, int *numkeys, int flags);
int *zunionInterGetKeys(struct redisCommand *cmd,robj **argv, int argc, int *numkeys, int flags);
-/* Cluster */
-void clusterInit(void);
-unsigned short crc16(const char *buf, int len);
-unsigned int keyHashSlot(char *key, int keylen);
-clusterNode *createClusterNode(char *nodename, int flags);
-int clusterAddNode(clusterNode *node);
-void clusterCron(void);
-clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask);
-void clusterPropagatePublish(robj *channel, robj *message);
-
/* Scripting */
void scriptingInit(void);
void publishCommand(redisClient *c);
void watchCommand(redisClient *c);
void unwatchCommand(redisClient *c);
-void clusterCommand(redisClient *c);
void restoreCommand(redisClient *c);
void migrateCommand(redisClient *c);
-void askingCommand(redisClient *c);
void dumpCommand(redisClient *c);
void objectCommand(redisClient *c);
void clientCommand(redisClient *c);
void evalCommand(redisClient *c);
void evalShaCommand(redisClient *c);
void scriptCommand(redisClient *c);
+void timeCommand(redisClient *c);
#if defined(__GNUC__)
void *calloc(size_t count, size_t size) __attribute__ ((deprecated));
void redisLogObjectDebugInfo(robj *o);
void sigsegvHandler(int sig, siginfo_t *info, void *secret);
sds genRedisInfoString(char *section);
+void enableWatchdog(int period);
+void disableWatchdog(void);
+void watchdogScheduleSignal(int period);
#endif