#include <errno.h>
#include <inttypes.h>
#include <pthread.h>
-
-#include "ae.h" /* Event driven programming library */
-#include "sds.h" /* Dynamic safe strings */
-#include "dict.h" /* Hash tables */
-#include "adlist.h" /* Linked lists */
+#include <syslog.h>
+#include <netinet/in.h>
+#include <lua.h>
+
+#include "ae.h" /* Event driven programming library */
+#include "sds.h" /* Dynamic safe strings */
+#include "dict.h" /* Hash tables */
+#include "adlist.h" /* Linked lists */
#include "zmalloc.h" /* total memory usage aware version of malloc/free */
-#include "anet.h" /* Networking the easy way */
-#include "zipmap.h" /* Compact string -> string data structure */
+#include "anet.h" /* Networking the easy way */
+#include "zipmap.h" /* Compact string -> string data structure */
#include "ziplist.h" /* Compact list data structure */
-#include "intset.h" /* Compact integer set structure */
-#include "version.h"
+#include "intset.h" /* Compact integer set structure */
+#include "version.h" /* Version macro */
+#include "util.h" /* Misc functions useful in many places */
/* Error codes */
#define REDIS_OK 0
/* Static server configuration */
#define REDIS_SERVERPORT 6379 /* TCP port */
-#define REDIS_MAXIDLETIME (60*5) /* default client timeout */
-#define REDIS_IOBUF_LEN 1024
+#define REDIS_MAXIDLETIME 0 /* default client timeout: infinite */
+#define REDIS_MAX_QUERYBUF_LEN (1024*1024*1024) /* 1GB max query buffer. */
+#define REDIS_IOBUF_LEN (1024*16)
#define REDIS_LOADBUF_LEN 1024
-#define REDIS_STATIC_ARGS 8
#define REDIS_DEFAULT_DBNUM 16
#define REDIS_CONFIGLINE_MAX 1024
#define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
#define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
#define REDIS_SHARED_INTEGERS 10000
-#define REDIS_REPLY_CHUNK_BYTES (5*1500) /* 5 TCP packets with default MTU */
-
-/* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
-#define REDIS_WRITEV_THRESHOLD 3
-/* Max number of iovecs used for each writev call */
-#define REDIS_WRITEV_IOVEC_COUNT 256
+#define REDIS_REPLY_CHUNK_BYTES (16*1024) /* 16k output buffer */
+#define REDIS_MAX_LOGMSG_LEN 1024 /* Default maximum length of syslog messages */
+#define REDIS_AUTO_AOFREWRITE_PERC 100
+#define REDIS_AUTO_AOFREWRITE_MIN_SIZE (1024*1024)
+#define REDIS_SLOWLOG_LOG_SLOWER_THAN 10000
+#define REDIS_SLOWLOG_MAX_LEN 64
+#define REDIS_MAX_CLIENTS 10000
+
+#define REDIS_REPL_TIMEOUT 60
+#define REDIS_REPL_PING_SLAVE_PERIOD 10
+#define REDIS_MBULK_BIG_ARG (1024*32)
/* Hash table parameters */
#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
-/* Command flags:
- * REDIS_CMD_DENYOOM:
- * Commands marked with this flag will return an error when 'maxmemory' is
- * set and the server is using more than 'maxmemory' bytes of memory.
- * In short: commands with this flag are denied on low memory conditions.
- * REDIS_CMD_FORCE_REPLICATION:
- * Force replication even if dirty is 0. */
-#define REDIS_CMD_DENYOOM 4
-#define REDIS_CMD_FORCE_REPLICATION 8
+/* Command flags. Please check the command table defined in the redis.c file
+ * for more information about the meaning of every flag. */
+#define REDIS_CMD_WRITE 1 /* "w" flag */
+#define REDIS_CMD_READONLY 2 /* "r" flag */
+#define REDIS_CMD_DENYOOM 4 /* "m" flag */
+#define REDIS_CMD_FORCE_REPLICATION 8 /* "f" flag */
+#define REDIS_CMD_ADMIN 16 /* "a" flag */
+#define REDIS_CMD_PUBSUB 32 /* "p" flag */
+#define REDIS_CMD_NOSCRIPT 64 /* "s" flag */
+#define REDIS_CMD_RANDOM 128 /* "R" flag */
/* Object types */
#define REDIS_STRING 0
#define REDIS_ENCODING_LINKEDLIST 4 /* Encoded as regular linked list */
#define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
#define REDIS_ENCODING_INTSET 6 /* Encoded as intset */
-
-/* Object types only used for dumping to disk */
-#define REDIS_EXPIRETIME 253
-#define REDIS_SELECTDB 254
-#define REDIS_EOF 255
+#define REDIS_ENCODING_SKIPLIST 7 /* Encoded as skiplist */
/* Defines related to the dump file format. To store 32 bits lengths for short
* keys requires a lot of space, so we check the most significant 2 bits of
#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
-/* Virtual memory object->where field. */
-#define REDIS_VM_MEMORY 0 /* The object is on memory */
-#define REDIS_VM_SWAPPED 1 /* The object is on disk */
-#define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
-#define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
-
-/* Virtual memory static configuration stuff.
- * Check vmFindContiguousPages() to know more about this magic numbers. */
-#define REDIS_VM_MAX_NEAR_PAGES 65536
-#define REDIS_VM_MAX_RANDOM_JUMP 4096
-#define REDIS_VM_MAX_THREADS 32
-#define REDIS_THREAD_STACK_SIZE (1024*1024*4)
-/* The following is the *percentage* of completed I/O jobs to process when the
- * handelr is called. While Virtual Memory I/O operations are performed by
- * threads, this operations must be processed by the main thread when completed
- * in order to take effect. */
-#define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
-
/* Client flags */
#define REDIS_SLAVE 1 /* This client is a slave server */
#define REDIS_MASTER 2 /* This client is a master server */
#define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
#define REDIS_MULTI 8 /* This client is in a MULTI context */
#define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
-#define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
#define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
#define REDIS_CLOSE_AFTER_REPLY 128 /* Close after writing entire reply. */
+#define REDIS_UNBLOCKED 256 /* This client was unblocked and is stored in
+ server.unblocked_clients */
+#define REDIS_LUA_CLIENT 512 /* This is a non connected client used by Lua */
+#define REDIS_ASKING 1024 /* Client issued the ASKING command */
/* Client request types */
#define REDIS_REQ_INLINE 1
#define REDIS_REQ_MULTIBULK 2
/* Slave replication state - slave side */
-#define REDIS_REPL_NONE 0 /* No active replication */
-#define REDIS_REPL_CONNECT 1 /* Must connect to master */
-#define REDIS_REPL_TRANSFER 2 /* Receiving .rdb from master */
-#define REDIS_REPL_CONNECTED 3 /* Connected to master */
+#define REDIS_REPL_NONE 0 /* No active replication */
+#define REDIS_REPL_CONNECT 1 /* Must connect to master */
+#define REDIS_REPL_CONNECTING 2 /* Connecting to master */
+#define REDIS_REPL_TRANSFER 3 /* Receiving .rdb from master */
+#define REDIS_REPL_CONNECTED 4 /* Connected to master */
+
+/* Synchronous read timeout - slave side */
+#define REDIS_REPL_SYNCIO_TIMEOUT 5
/* Slave replication state - from the point of view of master
* Note that in SEND_BULK and ONLINE state the slave receives new updates
#define REDIS_VERBOSE 1
#define REDIS_NOTICE 2
#define REDIS_WARNING 3
+#define REDIS_LOG_RAW (1<<10) /* Modifier to log without timestamp */
/* Anti-warning macro... */
#define REDIS_NOTUSED(V) ((void) V)
#define APPENDFSYNC_EVERYSEC 2
/* Zip structure related defaults */
-#define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
-#define REDIS_HASH_MAX_ZIPMAP_VALUE 512
-#define REDIS_LIST_MAX_ZIPLIST_ENTRIES 1024
-#define REDIS_LIST_MAX_ZIPLIST_VALUE 32
-#define REDIS_SET_MAX_INTSET_ENTRIES 4096
+#define REDIS_HASH_MAX_ZIPMAP_ENTRIES 512
+#define REDIS_HASH_MAX_ZIPMAP_VALUE 64
+#define REDIS_LIST_MAX_ZIPLIST_ENTRIES 512
+#define REDIS_LIST_MAX_ZIPLIST_VALUE 64
+#define REDIS_SET_MAX_INTSET_ENTRIES 512
+#define REDIS_ZSET_MAX_ZIPLIST_ENTRIES 128
+#define REDIS_ZSET_MAX_ZIPLIST_VALUE 64
/* Sets operations codes */
#define REDIS_OP_UNION 0
#define REDIS_MAXMEMORY_ALLKEYS_RANDOM 4
#define REDIS_MAXMEMORY_NO_EVICTION 5
+/* Scripting */
+#define REDIS_LUA_TIME_LIMIT 5000 /* milliseconds */
+
+/* Units */
+#define UNIT_SECONDS 0
+#define UNIT_MILLISECONDS 1
+
+/* SHUTDOWN flags */
+#define REDIS_SHUTDOWN_SAVE 1 /* Force SAVE on SHUTDOWN even if no save
+ points are configured. */
+#define REDIS_SHUTDOWN_NOSAVE 2 /* Don't SAVE on SHUTDOWN. */
+
/* We can print the stacktrace, so our assert is defined this way: */
+#define redisAssertWithInfo(_c,_o,_e) ((_e)?(void)0 : (_redisAssertWithInfo(_c,_o,#_e,__FILE__,__LINE__),_exit(1)))
#define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
#define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
-void _redisAssert(char *estr, char *file, int line);
-void _redisPanic(char *msg, char *file, int line);
/*-----------------------------------------------------------------------------
* Data types
#define REDIS_LRU_CLOCK_RESOLUTION 10 /* LRU clock resolution in seconds */
typedef struct redisObject {
unsigned type:4;
- unsigned storage:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */
+ unsigned notused:2; /* Not used */
unsigned encoding:4;
unsigned lru:22; /* lru time (relative to server.lruclock) */
int refcount;
void *ptr;
- /* VM fields are only allocated if VM is active, otherwise the
- * object allocation function will just allocate
- * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
- * Redis without VM active will not have any overhead. */
} robj;
-/* The VM pointer structure - identifies an object in the swap file.
- *
- * This object is stored in place of the value
- * object in the main key->value hash table representing a database.
- * Note that the first fields (type, storage) are the same as the redisObject
- * structure so that vmPointer strucuters can be accessed even when casted
- * as redisObject structures.
- *
- * This is useful as we don't know if a value object is or not on disk, but we
- * are always able to read obj->storage to check this. For vmPointer
- * structures "type" is set to REDIS_VMPOINTER (even if without this field
- * is still possible to check the kind of object from the value of 'storage').*/
-typedef struct vmPointer {
- unsigned type:4;
- unsigned storage:2; /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
- unsigned notused:26;
- unsigned int vtype; /* type of the object stored in the swap file */
- off_t page; /* the page at witch the object is stored on disk */
- off_t usedpages; /* number of pages used on disk */
-} vmpointer;
-
/* Macro used to initalize a Redis object allocated on the stack.
* Note that this macro is taken near the structure definition to make sure
* we'll update it when the structure is changed, to avoid bugs like
_var.type = REDIS_STRING; \
_var.encoding = REDIS_ENCODING_RAW; \
_var.ptr = _ptr; \
- _var.storage = REDIS_VM_MEMORY; \
} while(0);
typedef struct redisDb {
dict *dict; /* The keyspace for this DB */
dict *expires; /* Timeout of keys with a timeout set */
dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */
- dict *io_keys; /* Keys with clients waiting for VM I/O */
dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
int id;
} redisDb;
sds querybuf;
int argc;
robj **argv;
+ struct redisCommand *cmd, *lastcmd;
int reqtype;
int multibulklen; /* number of multi bulk arguments left to read */
long bulklen; /* length of bulk argument in multi bulk request */
robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *cnegone, *pong, *space,
*colon, *nullbulk, *nullmultibulk, *queued,
*emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
- *outofrangeerr, *loadingerr, *plus,
+ *outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *plus,
*select0, *select1, *select2, *select3, *select4,
*select5, *select6, *select7, *select8, *select9,
*messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *mbulk3,
*integers[REDIS_SHARED_INTEGERS];
};
-/* Global server state structure */
+/* ZSETs use a specialized version of Skiplists */
+typedef struct zskiplistNode {
+ robj *obj;
+ double score;
+ struct zskiplistNode *backward;
+ struct zskiplistLevel {
+ struct zskiplistNode *forward;
+ unsigned int span;
+ } level[];
+} zskiplistNode;
+
+typedef struct zskiplist {
+ struct zskiplistNode *header, *tail;
+ unsigned long length;
+ int level;
+} zskiplist;
+
+typedef struct zset {
+ dict *dict;
+ zskiplist *zsl;
+} zset;
+
+/*-----------------------------------------------------------------------------
+ * Redis cluster data structures
+ *----------------------------------------------------------------------------*/
+
+#define REDIS_CLUSTER_SLOTS 4096
+#define REDIS_CLUSTER_OK 0 /* Everything looks ok */
+#define REDIS_CLUSTER_FAIL 1 /* The cluster can't work */
+#define REDIS_CLUSTER_NEEDHELP 2 /* The cluster works, but needs some help */
+#define REDIS_CLUSTER_NAMELEN 40 /* sha1 hex length */
+#define REDIS_CLUSTER_PORT_INCR 10000 /* Cluster port = baseport + PORT_INCR */
+
+struct clusterNode;
+
+/* clusterLink encapsulates everything needed to talk with a remote node. */
+typedef struct clusterLink {
+ int fd; /* TCP socket file descriptor */
+ sds sndbuf; /* Packet send buffer */
+ sds rcvbuf; /* Packet reception buffer */
+ struct clusterNode *node; /* Node related to this link if any, or NULL */
+} clusterLink;
+
+/* Node flags */
+#define REDIS_NODE_MASTER 1 /* The node is a master */
+#define REDIS_NODE_SLAVE 2 /* The node is a slave */
+#define REDIS_NODE_PFAIL 4 /* Failure? Need acknowledge */
+#define REDIS_NODE_FAIL 8 /* The node is believed to be malfunctioning */
+#define REDIS_NODE_MYSELF 16 /* This node is myself */
+#define REDIS_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */
+#define REDIS_NODE_NOADDR 64 /* We don't know the address of this node */
+#define REDIS_NODE_MEET 128 /* Send a MEET message to this node */
+#define REDIS_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
+
+struct clusterNode {
+ char name[REDIS_CLUSTER_NAMELEN]; /* Node name, hex string, sha1-size */
+ int flags; /* REDIS_NODE_... */
+ unsigned char slots[REDIS_CLUSTER_SLOTS/8]; /* slots handled by this node */
+ int numslaves; /* Number of slave nodes, if this is a master */
+ struct clusterNode **slaves; /* pointers to slave nodes */
+ struct clusterNode *slaveof; /* pointer to the master node */
+ time_t ping_sent; /* Unix time we sent latest ping */
+ time_t pong_received; /* Unix time we received the pong */
+ char *configdigest; /* Configuration digest of this node */
+ time_t configdigest_ts; /* Configuration digest timestamp */
+ char ip[16]; /* Latest known IP address of this node */
+ int port; /* Latest known port of this node */
+ clusterLink *link; /* TCP/IP link with this node */
+};
+typedef struct clusterNode clusterNode;
+
+typedef struct {
+ char *configfile;
+ clusterNode *myself; /* This node */
+ int state; /* REDIS_CLUSTER_OK, REDIS_CLUSTER_FAIL, ... */
+ int node_timeout;
+ dict *nodes; /* Hash table of name -> clusterNode structures */
+ clusterNode *migrating_slots_to[REDIS_CLUSTER_SLOTS];
+ clusterNode *importing_slots_from[REDIS_CLUSTER_SLOTS];
+ clusterNode *slots[REDIS_CLUSTER_SLOTS];
+ zskiplist *slots_to_keys;
+} clusterState;
+
+/* Redis cluster messages header */
+
+/* Note that the PING, PONG and MEET messages are actually the same exact
+ * kind of packet. PONG is the reply to ping, in the extact format as a PING,
+ * while MEET is a special PING that forces the receiver to add the sender
+ * as a node (if it is not already in the list). */
+#define CLUSTERMSG_TYPE_PING 0 /* Ping */
+#define CLUSTERMSG_TYPE_PONG 1 /* Pong (reply to Ping) */
+#define CLUSTERMSG_TYPE_MEET 2 /* Meet "let's join" message */
+#define CLUSTERMSG_TYPE_FAIL 3 /* Mark node xxx as failing */
+#define CLUSTERMSG_TYPE_PUBLISH 4 /* Pub/Sub Publish propatagion */
+
+/* Initially we don't know our "name", but we'll find it once we connect
+ * to the first node, using the getsockname() function. Then we'll use this
+ * address for all the next messages. */
+typedef struct {
+ char nodename[REDIS_CLUSTER_NAMELEN];
+ uint32_t ping_sent;
+ uint32_t pong_received;
+ char ip[16]; /* IP address last time it was seen */
+ uint16_t port; /* port last time it was seen */
+ uint16_t flags;
+ uint32_t notused; /* for 64 bit alignment */
+} clusterMsgDataGossip;
+
+typedef struct {
+ char nodename[REDIS_CLUSTER_NAMELEN];
+} clusterMsgDataFail;
+
+typedef struct {
+ uint32_t channel_len;
+ uint32_t message_len;
+ unsigned char bulk_data[8]; /* defined as 8 just for alignment concerns. */
+} clusterMsgDataPublish;
+
+union clusterMsgData {
+ /* PING, MEET and PONG */
+ struct {
+ /* Array of N clusterMsgDataGossip structures */
+ clusterMsgDataGossip gossip[1];
+ } ping;
+
+ /* FAIL */
+ struct {
+ clusterMsgDataFail about;
+ } fail;
+
+ /* PUBLISH */
+ struct {
+ clusterMsgDataPublish msg;
+ } publish;
+};
+
+typedef struct {
+ uint32_t totlen; /* Total length of this message */
+ uint16_t type; /* Message type */
+ uint16_t count; /* Only used for some kind of messages. */
+ char sender[REDIS_CLUSTER_NAMELEN]; /* Name of the sender node */
+ unsigned char myslots[REDIS_CLUSTER_SLOTS/8];
+ char slaveof[REDIS_CLUSTER_NAMELEN];
+ char configdigest[32];
+ uint16_t port; /* Sender TCP base port */
+ unsigned char state; /* Cluster state from the POV of the sender */
+ unsigned char notused[5]; /* Reserved for future use. For alignment. */
+ union clusterMsgData data;
+} clusterMsg;
+
+/*-----------------------------------------------------------------------------
+ * Global server state
+ *----------------------------------------------------------------------------*/
+
struct redisServer {
- pthread_t mainthread;
+ /* General */
+ redisDb *db;
+ dict *commands; /* Command table hahs table */
+ aeEventLoop *el;
+ /* Networking */
int port;
char *bindaddr;
char *unixsocket;
+ mode_t unixsocketperm;
int ipfd;
int sofd;
- redisDb *db;
- long long dirty; /* changes to DB from the last save */
- long long dirty_before_bgsave; /* used to restore dirty on failed BGSAVE */
+ int cfd;
list *clients;
- dict *commands; /* Command table hahs table */
+ list *slaves, *monitors;
+ char neterr[ANET_ERR_LEN];
/* RDB / AOF loading information */
int loading;
off_t loading_total_bytes;
time_t loading_start_time;
/* Fast pointers to often looked up command */
struct redisCommand *delCommand, *multiCommand;
- list *slaves, *monitors;
- char neterr[ANET_ERR_LEN];
- aeEventLoop *el;
int cronloops; /* number of times the cron function run */
time_t lastsave; /* Unix time of last save succeeede */
/* Fields used only for stats */
long long stat_numcommands; /* number of processed commands */
long long stat_numconnections; /* number of connections received */
long long stat_expiredkeys; /* number of expired keys */
+ long long stat_evictedkeys; /* number of evicted keys (maxmemory) */
long long stat_keyspace_hits; /* number of successful lookups of keys */
long long stat_keyspace_misses; /* number of failed lookups of keys */
+ size_t stat_peak_memory; /* max used memory record */
+ long long stat_fork_time; /* time needed to perform latets fork() */
+ long long stat_rejected_conn; /* clients rejected because of maxclients */
+ list *slowlog;
+ long long slowlog_entry_id;
+ long long slowlog_log_slower_than;
+ unsigned long slowlog_max_len;
/* Configuration */
int verbosity;
- int glueoutputbuf;
int maxidletime;
+ size_t client_max_querybuf_len;
int dbnum;
int daemonize;
int appendonly;
int appendfsync;
int no_appendfsync_on_rewrite;
- int shutdown_asap;
+ int auto_aofrewrite_perc; /* Rewrite AOF if % growth is > M and... */
+ off_t auto_aofrewrite_min_size; /* the AOF file is at least N bytes. */
+ off_t auto_aofrewrite_base_size;/* AOF size on latest startup or rewrite. */
+ off_t appendonly_current_size; /* AOF current size. */
+ int aofrewrite_scheduled; /* Rewrite once BGSAVE terminates. */
+ int shutdown_asap; /* SHUTDOWN needed */
+ int activerehashing;
+ char *requirepass;
+ /* Persistence */
+ long long dirty; /* changes to DB from the last save */
+ long long dirty_before_bgsave; /* used to restore dirty on failed BGSAVE */
time_t lastfsync;
int appendfd;
int appendseldb;
+ time_t aof_flush_postponed_start;
char *pidfile;
pid_t bgsavechildpid;
pid_t bgrewritechildpid;
sds aofbuf; /* AOF buffer, written before entering the event loop */
struct saveparam *saveparams;
int saveparamslen;
- char *logfile;
char *dbfilename;
- char *appendfilename;
- char *requirepass;
int rdbcompression;
- int activerehashing;
+ char *appendfilename;
+ /* Logging */
+ char *logfile;
+ int syslog_enabled;
+ char *syslog_ident;
+ int syslog_facility;
/* Replication related */
int isslave;
/* Slave specific fields */
char *masterauth;
char *masterhost;
int masterport;
+ int repl_ping_slave_period;
+ int repl_timeout;
redisClient *master; /* client that is master for this slave */
+ int repl_syncio_timeout; /* timeout for synchronous I/O calls */
int replstate; /* replication status if the instance is a slave */
off_t repl_transfer_left; /* bytes left reading .rdb */
int repl_transfer_s; /* slave -> master SYNC socket */
char *repl_transfer_tmpfile; /* slave-> master SYNC temp file name */
time_t repl_transfer_lastio; /* unix time of the latest read, for timeout */
int repl_serve_stale_data; /* Serve stale data when link is down? */
+ time_t repl_down_since; /* unix time at which link with master went down */
/* Limits */
unsigned int maxclients;
unsigned long long maxmemory;
int maxmemory_samples;
/* Blocked clients */
unsigned int bpop_blocked_clients;
- unsigned int vm_blocked_clients;
+ list *unblocked_clients; /* list of clients to unblock before next loop */
/* Sort parameters - qsort_r() is only available under BSD so we
* have to take this state global, in order to pass it to sortCompare() */
int sort_desc;
int sort_alpha;
int sort_bypattern;
- /* Virtual memory configuration */
- int vm_enabled;
- char *vm_swap_file;
- off_t vm_page_size;
- off_t vm_pages;
- unsigned long long vm_max_memory;
/* Zip structure config */
size_t hash_max_zipmap_entries;
size_t hash_max_zipmap_value;
size_t list_max_ziplist_entries;
size_t list_max_ziplist_value;
size_t set_max_intset_entries;
- /* Virtual memory state */
- FILE *vm_fp;
- int vm_fd;
- off_t vm_next_page; /* Next probably empty page */
- off_t vm_near_pages; /* Number of pages allocated sequentially */
- unsigned char *vm_bitmap; /* Bitmap of free/used pages */
+ size_t zset_max_ziplist_entries;
+ size_t zset_max_ziplist_value;
time_t unixtime; /* Unix time sampled every second. */
- /* Virtual memory I/O threads stuff */
- /* An I/O thread process an element taken from the io_jobs queue and
- * put the result of the operation in the io_done list. While the
- * job is being processed, it's put on io_processing queue. */
- list *io_newjobs; /* List of VM I/O jobs yet to be processed */
- list *io_processing; /* List of VM I/O jobs being processed */
- list *io_processed; /* List of VM I/O jobs already processed */
- list *io_ready_clients; /* Clients ready to be unblocked. All keys loaded */
- pthread_mutex_t io_mutex; /* lock to access io_jobs/io_done/io_thread_job */
- pthread_mutex_t io_swapfile_mutex; /* So we can lseek + write */
- pthread_attr_t io_threads_attr; /* attributes for threads creation */
- int io_active_threads; /* Number of running I/O threads */
- int vm_max_threads; /* Max number of I/O threads running at the same time */
- /* Our main thread is blocked on the event loop, locking for sockets ready
- * to be read or written, so when a threaded I/O operation is ready to be
- * processed by the main thread, the I/O thread will use a unix pipe to
- * awake the main thread. The followings are the two pipe FDs. */
- int io_ready_pipe_read;
- int io_ready_pipe_write;
- /* Virtual memory stats */
- unsigned long long vm_stats_used_pages;
- unsigned long long vm_stats_swapped_objects;
- unsigned long long vm_stats_swapouts;
- unsigned long long vm_stats_swapins;
/* Pubsub */
dict *pubsub_channels; /* Map channels to list of subscribed clients */
list *pubsub_patterns; /* A list of pubsub_patterns */
/* Misc */
unsigned lruclock:22; /* clock incrementing every minute, for LRU */
unsigned lruclock_padding:10;
+ /* Cluster */
+ int cluster_enabled;
+ clusterState cluster;
+ /* Scripting */
+ lua_State *lua; /* The Lua interpreter. We use just one for all clients */
+ redisClient *lua_client; /* The "fake client" to query Redis from Lua */
+ redisClient *lua_caller; /* The client running EVAL right now, or NULL */
+ dict *lua_scripts; /* A dictionary of SHA1 -> Lua scripts */
+ long long lua_time_limit;
+ long long lua_time_start;
+ int lua_write_dirty; /* True if a write command was called during the
+ execution of the current script. */
+ int lua_random_dirty; /* True if a random command was called during the
+ execution of the current script. */
+ int lua_timedout; /* True if we reached the time limit for script
+ execution. */
+ int lua_kill; /* Kill the script if true. */
+ /* Assert & bug reportign */
+ char *assert_failed;
+ char *assert_file;
+ int assert_line;
+ int bug_report_start; /* True if bug report header already logged. */
};
typedef struct pubsubPattern {
} pubsubPattern;
typedef void redisCommandProc(redisClient *c);
-typedef void redisVmPreloadProc(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
+typedef int *redisGetKeysProc(struct redisCommand *cmd, robj **argv, int argc, int *numkeys, int flags);
struct redisCommand {
char *name;
redisCommandProc *proc;
int arity;
- int flags;
- /* Use a function to determine which keys need to be loaded
- * in the background prior to executing this command. Takes precedence
- * over vm_firstkey and others, ignored when NULL */
- redisVmPreloadProc *vm_preload_proc;
+ char *sflags; /* Flags as string represenation, one char per flag. */
+ int flags; /* The actual flags, obtained from the 'sflags' field. */
+ /* Use a function to determine keys arguments in a command line.
+ * Used for Redis Cluster redirect. */
+ redisGetKeysProc *getkeys_proc;
/* What keys should be loaded in background when calling this command? */
- int vm_firstkey; /* The first argument that's a key (0 = no keys) */
- int vm_lastkey; /* THe last argument that's a key */
- int vm_keystep; /* The step between first and last key */
+ int firstkey; /* The first argument that's a key (0 = no keys) */
+ int lastkey; /* THe last argument that's a key */
+ int keystep; /* The step between first and last key */
+ long long microseconds, calls;
};
struct redisFunctionSym {
robj *pattern;
} redisSortOperation;
-/* ZSETs use a specialized version of Skiplists */
-typedef struct zskiplistNode {
- robj *obj;
- double score;
- struct zskiplistNode *backward;
- struct zskiplistLevel {
- struct zskiplistNode *forward;
- unsigned int span;
- } level[];
-} zskiplistNode;
-
-typedef struct zskiplist {
- struct zskiplistNode *header, *tail;
- unsigned long length;
- int level;
-} zskiplist;
-
-typedef struct zset {
- dict *dict;
- zskiplist *zsl;
-} zset;
-
-/* VM threaded I/O request message */
-#define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
-#define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
-#define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
-typedef struct iojob {
- int type; /* Request type, REDIS_IOJOB_* */
- redisDb *db;/* Redis database */
- robj *key; /* This I/O request is about swapping this key */
- robj *id; /* Unique identifier of this job:
- this is the object to swap for REDIS_IOREQ_*_SWAP, or the
- vmpointer objct for REDIS_IOREQ_LOAD. */
- robj *val; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
- * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
- off_t page; /* Swap page where to read/write the object */
- off_t pages; /* Swap pages needed to save object. PREPARE_SWAP return val */
- int canceled; /* True if this command was canceled by blocking side of VM */
- pthread_t thread; /* ID of the thread processing this entry */
-} iojob;
-
/* Structure to hold list iteration abstraction. */
typedef struct {
robj *subject;
extern struct sharedObjectsStruct shared;
extern dictType setDictType;
extern dictType zsetDictType;
+extern dictType clusterNodesDictType;
+extern dictType dbDictType;
extern double R_Zero, R_PosInf, R_NegInf, R_Nan;
dictType hashDictType;
* Functions prototypes
*----------------------------------------------------------------------------*/
+/* Utils */
+long long ustime(void);
+long long mstime(void);
+
/* networking.c -- Networking and Client related operations */
redisClient *createClient(int fd);
void closeTimedoutClients(void);
void freeClient(redisClient *c);
void resetClient(redisClient *c);
void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask);
-void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask);
void addReply(redisClient *c, robj *obj);
void *addDeferredMultiBulkLength(redisClient *c);
void setDeferredMultiBulkLength(redisClient *c, void *node, long length);
void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask);
void addReplyBulk(redisClient *c, robj *obj);
void addReplyBulkCString(redisClient *c, char *s);
+void addReplyBulkCBuffer(redisClient *c, void *p, size_t len);
+void addReplyBulkLongLong(redisClient *c, long long ll);
void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
void addReply(redisClient *c, robj *obj);
void addReplySds(redisClient *c, sds s);
void addReplyLongLong(redisClient *c, long long ll);
void addReplyMultiBulkLen(redisClient *c, long length);
void *dupClientReplyValue(void *o);
+void getClientsMaxBuffers(unsigned long *longest_output_list,
+ unsigned long *biggest_input_buffer);
+sds getClientInfoString(redisClient *client);
+sds getAllClientsInfoString(void);
+void rewriteClientCommandVector(redisClient *c, int argc, ...);
+void rewriteClientCommandArgument(redisClient *c, int i, robj *newval);
#ifdef __GNUC__
void addReplyErrorFormat(redisClient *c, const char *fmt, ...)
void unwatchAllKeys(redisClient *c);
void initClientMultiState(redisClient *c);
void freeClientMultiState(redisClient *c);
-void queueMultiCommand(redisClient *c, struct redisCommand *cmd);
+void queueMultiCommand(redisClient *c);
void touchWatchedKey(redisDb *db, robj *key);
void touchWatchedKeysOnFlush(int dbid);
/* Redis object implementation */
void decrRefCount(void *o);
void incrRefCount(robj *o);
+robj *resetRefCount(robj *obj);
void freeStringObject(robj *o);
void freeListObject(robj *o);
void freeSetObject(robj *o);
robj *createObject(int type, void *ptr);
robj *createStringObject(char *ptr, size_t len);
robj *dupStringObject(robj *o);
+int isObjectRepresentableAsLongLong(robj *o, long long *llongval);
robj *tryObjectEncoding(robj *o);
robj *getDecodedObject(robj *o);
size_t stringObjectLen(robj *o);
robj *createStringObjectFromLongLong(long long value);
+robj *createStringObjectFromLongDouble(long double value);
robj *createListObject(void);
robj *createZiplistObject(void);
robj *createSetObject(void);
robj *createIntsetObject(void);
robj *createHashObject(void);
robj *createZsetObject(void);
+robj *createZsetZiplistObject(void);
int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char *msg);
int checkType(redisClient *c, robj *o, int type);
int getLongLongFromObjectOrReply(redisClient *c, robj *o, long long *target, const char *msg);
int getDoubleFromObjectOrReply(redisClient *c, robj *o, double *target, const char *msg);
int getLongLongFromObject(robj *o, long long *target);
+int getLongDoubleFromObject(robj *o, long double *target);
+int getLongDoubleFromObjectOrReply(redisClient *c, robj *o, long double *target, const char *msg);
char *strEncoding(int encoding);
int compareStringObjects(robj *a, robj *b);
int equalStringObjects(robj *a, robj *b);
int syncWrite(int fd, char *ptr, ssize_t size, int timeout);
int syncRead(int fd, char *ptr, ssize_t size, int timeout);
int syncReadLine(int fd, char *ptr, ssize_t size, int timeout);
-int fwriteBulkString(FILE *fp, char *s, unsigned long len);
-int fwriteBulkDouble(FILE *fp, double d);
-int fwriteBulkLongLong(FILE *fp, long long l);
-int fwriteBulkObject(FILE *fp, robj *obj);
/* Replication */
void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc);
void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc);
-int syncWithMaster(void);
void updateSlavesWaitingBgsave(int bgsaveerr);
void replicationCron(void);
void stopLoading(void);
/* RDB persistence */
-int rdbLoad(char *filename);
-int rdbSaveBackground(char *filename);
-void rdbRemoveTempFile(pid_t childpid);
-int rdbSave(char *filename);
-int rdbSaveObject(FILE *fp, robj *o);
-off_t rdbSavedObjectLen(robj *o);
-off_t rdbSavedObjectPages(robj *o);
-robj *rdbLoadObject(int type, FILE *fp);
-void backgroundSaveDoneHandler(int statloc);
+#include "rdb.h"
/* AOF persistence */
-void flushAppendOnlyFile(void);
+void flushAppendOnlyFile(int force);
void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
void aofRemoveTempFile(pid_t childpid);
int rewriteAppendOnlyFileBackground(void);
int loadAppendOnlyFile(char *filename);
void stopAppendOnly(void);
int startAppendOnly(void);
-void backgroundRewriteDoneHandler(int statloc);
+void backgroundRewriteDoneHandler(int exitcode, int bysignal);
/* Sorted sets data type */
+
+/* Struct to hold a inclusive/exclusive range spec. */
+typedef struct {
+ double min, max;
+ int minex, maxex; /* are min or max exclusive? */
+} zrangespec;
+
zskiplist *zslCreate(void);
void zslFree(zskiplist *zsl);
zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj);
+unsigned char *zzlInsert(unsigned char *zl, robj *ele, double score);
+int zslDelete(zskiplist *zsl, double score, robj *obj);
+zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec range);
+double zzlGetScore(unsigned char *sptr);
+void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
+void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
+unsigned int zsetLength(robj *zobj);
+void zsetConvert(robj *zobj, int encoding);
/* Core functions */
void freeMemoryIfNeeded(void);
int processCommand(redisClient *c);
-void setupSigSegvAction(void);
+void setupSignalHandlers(void);
struct redisCommand *lookupCommand(sds name);
struct redisCommand *lookupCommandByCString(char *s);
-void call(redisClient *c, struct redisCommand *cmd);
+void call(redisClient *c);
int prepareForShutdown();
void redisLog(int level, const char *fmt, ...);
+void redisLogRaw(int level, const char *msg);
void usage();
void updateDictResizePolicy(void);
int htNeedsResize(dict *dict);
void oom(const char *msg);
void populateCommandTable(void);
-
-/* Virtual Memory */
-void vmInit(void);
-void vmMarkPagesFree(off_t page, off_t count);
-robj *vmLoadObject(robj *o);
-robj *vmPreviewObject(robj *o);
-int vmSwapOneObjectBlocking(void);
-int vmSwapOneObjectThreaded(void);
-int vmCanSwapOut(void);
-void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata, int mask);
-void vmCancelThreadedIOJob(robj *o);
-void lockThreadedIO(void);
-void unlockThreadedIO(void);
-int vmSwapObjectThreaded(robj *key, robj *val, redisDb *db);
-void freeIOJob(iojob *j);
-void queueIOJob(iojob *j);
-int vmWriteObjectOnSwap(robj *o, off_t page);
-robj *vmReadObjectFromSwap(off_t page, int type);
-void waitEmptyIOJobsQueue(void);
-void vmReopenSwapFile(void);
-int vmFreePage(off_t page);
-void zunionInterBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
-void execBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
-int blockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd);
-int dontWaitForSwappedKey(redisClient *c, robj *key);
-void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key);
-vmpointer *vmSwapObjectBlocking(robj *val);
+void resetCommandTableStats(void);
/* Set data type */
robj *setTypeCreate(robj *value);
int setTypeIsMember(robj *subject, robj *value);
setTypeIterator *setTypeInitIterator(robj *subject);
void setTypeReleaseIterator(setTypeIterator *si);
-robj *setTypeNext(setTypeIterator *si);
-robj *setTypeRandomElement(robj *subject);
+int setTypeNext(setTypeIterator *si, robj **objele, int64_t *llele);
+robj *setTypeNextObject(setTypeIterator *si);
+int setTypeRandomElement(robj *setobj, robj **objele, int64_t *llele);
unsigned long setTypeSize(robj *subject);
void setTypeConvert(robj *subject, int enc);
void convertToRealHash(robj *o);
void hashTypeTryConversion(robj *subject, robj **argv, int start, int end);
void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2);
-robj *hashTypeGet(robj *o, robj *key);
+int hashTypeGet(robj *o, robj *key, robj **objval, unsigned char **v, unsigned int *vlen);
+robj *hashTypeGetObject(robj *o, robj *key);
int hashTypeExists(robj *o, robj *key);
int hashTypeSet(robj *o, robj *key, robj *value);
int hashTypeDelete(robj *o, robj *key);
hashTypeIterator *hashTypeInitIterator(robj *subject);
void hashTypeReleaseIterator(hashTypeIterator *hi);
int hashTypeNext(hashTypeIterator *hi);
-robj *hashTypeCurrent(hashTypeIterator *hi, int what);
+int hashTypeCurrent(hashTypeIterator *hi, int what, robj **objval, unsigned char **v, unsigned int *vlen);
+robj *hashTypeCurrentObject(hashTypeIterator *hi, int what);
robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key);
/* Pub / Sub */
int pubsubUnsubscribeAllPatterns(redisClient *c, int notify);
void freePubsubPattern(void *p);
int listMatchPubsubPattern(void *a, void *b);
-
-/* Utility functions */
-int stringmatchlen(const char *pattern, int patternLen,
- const char *string, int stringLen, int nocase);
-int stringmatch(const char *pattern, const char *string, int nocase);
-long long memtoll(const char *p, int *err);
-int ll2string(char *s, size_t len, long long value);
-int isStringRepresentableAsLong(sds s, long *longval);
-int isStringRepresentableAsLongLong(sds s, long long *longval);
-int isObjectRepresentableAsLongLong(robj *o, long long *llongval);
+int pubsubPublishMessage(robj *channel, robj *message);
/* Configuration */
void loadServerConfig(char *filename);
int removeExpire(redisDb *db, robj *key);
void propagateExpire(redisDb *db, robj *key);
int expireIfNeeded(redisDb *db, robj *key);
-time_t getExpire(redisDb *db, robj *key);
-void setExpire(redisDb *db, robj *key, time_t when);
+long long getExpire(redisDb *db, robj *key);
+void setExpire(redisDb *db, robj *key, long long when);
robj *lookupKey(redisDb *db, robj *key);
robj *lookupKeyRead(redisDb *db, robj *key);
robj *lookupKeyWrite(redisDb *db, robj *key);
robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply);
robj *lookupKeyWriteOrReply(redisClient *c, robj *key, robj *reply);
-int dbAdd(redisDb *db, robj *key, robj *val);
-int dbReplace(redisDb *db, robj *key, robj *val);
+void dbAdd(redisDb *db, robj *key, robj *val);
+void dbOverwrite(redisDb *db, robj *key, robj *val);
+void setKey(redisDb *db, robj *key, robj *val);
int dbExists(redisDb *db, robj *key);
robj *dbRandomKey(redisDb *db);
int dbDelete(redisDb *db, robj *key);
long long emptyDb();
int selectDb(redisClient *c, int id);
+void signalModifiedKey(redisDb *db, robj *key);
+void signalFlushedDb(int dbid);
+unsigned int GetKeysInSlot(unsigned int hashslot, robj **keys, unsigned int count);
+
+/* API to get key arguments from commands */
+#define REDIS_GETKEYS_ALL 0
+#define REDIS_GETKEYS_PRELOAD 1
+int *getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, int *numkeys, int flags);
+void getKeysFreeResult(int *result);
+int *noPreloadGetKeys(struct redisCommand *cmd,robj **argv, int argc, int *numkeys, int flags);
+int *renameGetKeys(struct redisCommand *cmd,robj **argv, int argc, int *numkeys, int flags);
+int *zunionInterGetKeys(struct redisCommand *cmd,robj **argv, int argc, int *numkeys, int flags);
+
+/* Cluster */
+void clusterInit(void);
+unsigned short crc16(const char *buf, int len);
+unsigned int keyHashSlot(char *key, int keylen);
+clusterNode *createClusterNode(char *nodename, int flags);
+int clusterAddNode(clusterNode *node);
+void clusterCron(void);
+clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask);
+void clusterPropagatePublish(robj *channel, robj *message);
+
+/* Scripting */
+void scriptingInit(void);
/* Git SHA1 */
char *redisGitSHA1(void);
void setCommand(redisClient *c);
void setnxCommand(redisClient *c);
void setexCommand(redisClient *c);
+void psetexCommand(redisClient *c);
void getCommand(redisClient *c);
void delCommand(redisClient *c);
void existsCommand(redisClient *c);
+void setbitCommand(redisClient *c);
+void getbitCommand(redisClient *c);
+void setrangeCommand(redisClient *c);
+void getrangeCommand(redisClient *c);
void incrCommand(redisClient *c);
void decrCommand(redisClient *c);
void incrbyCommand(redisClient *c);
void decrbyCommand(redisClient *c);
+void incrbyfloatCommand(redisClient *c);
void selectCommand(redisClient *c);
void randomkeyCommand(redisClient *c);
void keysCommand(redisClient *c);
void monitorCommand(redisClient *c);
void expireCommand(redisClient *c);
void expireatCommand(redisClient *c);
+void pexpireCommand(redisClient *c);
+void pexpireatCommand(redisClient *c);
void getsetCommand(redisClient *c);
void ttlCommand(redisClient *c);
+void pttlCommand(redisClient *c);
void persistCommand(redisClient *c);
void slaveofCommand(redisClient *c);
void debugCommand(redisClient *c);
void brpopCommand(redisClient *c);
void brpoplpushCommand(redisClient *c);
void appendCommand(redisClient *c);
-void substrCommand(redisClient *c);
void strlenCommand(redisClient *c);
void zrankCommand(redisClient *c);
void zrevrankCommand(redisClient *c);
void hexistsCommand(redisClient *c);
void configCommand(redisClient *c);
void hincrbyCommand(redisClient *c);
+void hincrbyfloatCommand(redisClient *c);
void subscribeCommand(redisClient *c);
void unsubscribeCommand(redisClient *c);
void psubscribeCommand(redisClient *c);
void publishCommand(redisClient *c);
void watchCommand(redisClient *c);
void unwatchCommand(redisClient *c);
+void clusterCommand(redisClient *c);
+void restoreCommand(redisClient *c);
+void migrateCommand(redisClient *c);
+void askingCommand(redisClient *c);
+void dumpCommand(redisClient *c);
+void objectCommand(redisClient *c);
+void clientCommand(redisClient *c);
+void evalCommand(redisClient *c);
+void evalShaCommand(redisClient *c);
+void scriptCommand(redisClient *c);
#if defined(__GNUC__)
void *calloc(size_t count, size_t size) __attribute__ ((deprecated));
void *realloc(void *ptr, size_t size) __attribute__ ((deprecated));
#endif
+/* Debugging stuff */
+void _redisAssertWithInfo(redisClient *c, robj *o, char *estr, char *file, int line);
+void _redisAssert(char *estr, char *file, int line);
+void _redisPanic(char *msg, char *file, int line);
+void bugReportStart(void);
+
#endif