X-Git-Url: https://git.saurik.com/redis.git/blobdiff_plain/5ef640986bc507079506bf77fd22ab31ea3f4fc4..d4a3cfed9c95e0ca5f8bcc771f8844ad17895c69:/src/redis.h diff --git a/src/redis.h b/src/redis.h index 1557e260..3d1e81aa 100644 --- a/src/redis.h +++ b/src/redis.h @@ -18,17 +18,20 @@ #include #include #include +#include +#include -#include "ae.h" /* Event driven programming library */ -#include "sds.h" /* Dynamic safe strings */ -#include "dict.h" /* Hash tables */ -#include "adlist.h" /* Linked lists */ +#include "ae.h" /* Event driven programming library */ +#include "sds.h" /* Dynamic safe strings */ +#include "dict.h" /* Hash tables */ +#include "adlist.h" /* Linked lists */ #include "zmalloc.h" /* total memory usage aware version of malloc/free */ -#include "anet.h" /* Networking the easy way */ -#include "zipmap.h" /* Compact string -> string data structure */ +#include "anet.h" /* Networking the easy way */ +#include "zipmap.h" /* Compact string -> string data structure */ #include "ziplist.h" /* Compact list data structure */ -#include "intset.h" /* Compact integer set structure */ -#include "version.h" +#include "intset.h" /* Compact integer set structure */ +#include "version.h" /* Version macro */ +#include "util.h" /* Misc functions useful in many places */ /* Error codes */ #define REDIS_OK 0 @@ -36,10 +39,9 @@ /* Static server configuration */ #define REDIS_SERVERPORT 6379 /* TCP port */ -#define REDIS_MAXIDLETIME (60*5) /* default client timeout */ -#define REDIS_IOBUF_LEN 1024 +#define REDIS_MAXIDLETIME 0 /* default client timeout: infinite */ +#define REDIS_IOBUF_LEN (1024*16) #define REDIS_LOADBUF_LEN 1024 -#define REDIS_STATIC_ARGS 8 #define REDIS_DEFAULT_DBNUM 16 #define REDIS_CONFIGLINE_MAX 1024 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */ @@ -47,26 +49,31 @@ #define REDIS_MAX_WRITE_PER_EVENT (1024*64) #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */ #define REDIS_SHARED_INTEGERS 10000 -#define REDIS_REPLY_CHUNK_BYTES (5*1500) /* 5 TCP packets with default MTU */ +#define REDIS_REPLY_CHUNK_BYTES (16*1024) /* 16k output buffer */ #define REDIS_MAX_LOGMSG_LEN 1024 /* Default maximum length of syslog messages */ +#define REDIS_AUTO_AOFREWRITE_PERC 100 +#define REDIS_AUTO_AOFREWRITE_MIN_SIZE (1024*1024) +#define REDIS_SLOWLOG_LOG_SLOWER_THAN 10000 +#define REDIS_SLOWLOG_MAX_LEN 64 +#define REDIS_MAX_CLIENTS 10000 -/* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */ -#define REDIS_WRITEV_THRESHOLD 3 -/* Max number of iovecs used for each writev call */ -#define REDIS_WRITEV_IOVEC_COUNT 256 +#define REDIS_REPL_TIMEOUT 60 +#define REDIS_REPL_PING_SLAVE_PERIOD 10 +#define REDIS_MBULK_BIG_ARG (1024*32) /* Hash table parameters */ #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */ -/* Command flags: - * REDIS_CMD_DENYOOM: - * Commands marked with this flag will return an error when 'maxmemory' is - * set and the server is using more than 'maxmemory' bytes of memory. - * In short: commands with this flag are denied on low memory conditions. - * REDIS_CMD_FORCE_REPLICATION: - * Force replication even if dirty is 0. */ -#define REDIS_CMD_DENYOOM 4 -#define REDIS_CMD_FORCE_REPLICATION 8 +/* Command flags. Please check the command table defined in the redis.c file + * for more information about the meaning of every flag. */ +#define REDIS_CMD_WRITE 1 /* "w" flag */ +#define REDIS_CMD_READONLY 2 /* "r" flag */ +#define REDIS_CMD_DENYOOM 4 /* "m" flag */ +#define REDIS_CMD_FORCE_REPLICATION 8 /* "f" flag */ +#define REDIS_CMD_ADMIN 16 /* "a" flag */ +#define REDIS_CMD_PUBSUB 32 /* "p" flag */ +#define REDIS_CMD_NOSCRIPT 64 /* "s" flag */ +#define REDIS_CMD_RANDOM 128 /* "R" flag */ /* Object types */ #define REDIS_STRING 0 @@ -86,11 +93,7 @@ #define REDIS_ENCODING_LINKEDLIST 4 /* Encoded as regular linked list */ #define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */ #define REDIS_ENCODING_INTSET 6 /* Encoded as intset */ - -/* Object types only used for dumping to disk */ -#define REDIS_EXPIRETIME 253 -#define REDIS_SELECTDB 254 -#define REDIS_EOF 255 +#define REDIS_ENCODING_SKIPLIST 7 /* Encoded as skiplist */ /* Defines related to the dump file format. To store 32 bits lengths for short * keys requires a lot of space, so we check the most significant 2 bits of @@ -119,33 +122,32 @@ #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */ #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */ -/* Disk store cache object->storage values */ -#define REDIS_DS_MEMORY 0 /* The object is on memory */ -#define REDIS_DS_DIRTY 1 /* The object was modified */ -#define REDIS_DS_SAVING 2 /* There is an IO Job created for this obj. */ - -#define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1 -#define REDIS_THREAD_STACK_SIZE (1024*1024*4) - /* Client flags */ #define REDIS_SLAVE 1 /* This client is a slave server */ #define REDIS_MASTER 2 /* This client is a master server */ #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */ #define REDIS_MULTI 8 /* This client is in a MULTI context */ #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */ -#define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */ #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */ #define REDIS_CLOSE_AFTER_REPLY 128 /* Close after writing entire reply. */ +#define REDIS_UNBLOCKED 256 /* This client was unblocked and is stored in + server.unblocked_clients */ +#define REDIS_LUA_CLIENT 512 /* This is a non connected client used by Lua */ +#define REDIS_ASKING 1024 /* Client issued the ASKING command */ /* Client request types */ #define REDIS_REQ_INLINE 1 #define REDIS_REQ_MULTIBULK 2 /* Slave replication state - slave side */ -#define REDIS_REPL_NONE 0 /* No active replication */ -#define REDIS_REPL_CONNECT 1 /* Must connect to master */ -#define REDIS_REPL_TRANSFER 2 /* Receiving .rdb from master */ -#define REDIS_REPL_CONNECTED 3 /* Connected to master */ +#define REDIS_REPL_NONE 0 /* No active replication */ +#define REDIS_REPL_CONNECT 1 /* Must connect to master */ +#define REDIS_REPL_CONNECTING 2 /* Connecting to master */ +#define REDIS_REPL_TRANSFER 3 /* Receiving .rdb from master */ +#define REDIS_REPL_CONNECTED 4 /* Connected to master */ + +/* Synchronous read timeout - slave side */ +#define REDIS_REPL_SYNCIO_TIMEOUT 5 /* Slave replication state - from the point of view of master * Note that in SEND_BULK and ONLINE state the slave receives new updates @@ -171,6 +173,7 @@ #define REDIS_VERBOSE 1 #define REDIS_NOTICE 2 #define REDIS_WARNING 3 +#define REDIS_LOG_RAW (1<<10) /* Modifier to log without timestamp */ /* Anti-warning macro... */ #define REDIS_NOTUSED(V) ((void) V) @@ -184,11 +187,13 @@ #define APPENDFSYNC_EVERYSEC 2 /* Zip structure related defaults */ -#define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64 -#define REDIS_HASH_MAX_ZIPMAP_VALUE 512 +#define REDIS_HASH_MAX_ZIPMAP_ENTRIES 512 +#define REDIS_HASH_MAX_ZIPMAP_VALUE 64 #define REDIS_LIST_MAX_ZIPLIST_ENTRIES 512 #define REDIS_LIST_MAX_ZIPLIST_VALUE 64 #define REDIS_SET_MAX_INTSET_ENTRIES 512 +#define REDIS_ZSET_MAX_ZIPLIST_ENTRIES 128 +#define REDIS_ZSET_MAX_ZIPLIST_VALUE 64 /* Sets operations codes */ #define REDIS_OP_UNION 0 @@ -203,11 +208,17 @@ #define REDIS_MAXMEMORY_ALLKEYS_RANDOM 4 #define REDIS_MAXMEMORY_NO_EVICTION 5 +/* Scripting */ +#define REDIS_LUA_TIME_LIMIT 5000 /* milliseconds */ + +/* Units */ +#define UNIT_SECONDS 0 +#define UNIT_MILLISECONDS 1 + /* We can print the stacktrace, so our assert is defined this way: */ +#define redisAssertWithInfo(_c,_o,_e) ((_e)?(void)0 : (_redisAssertWithInfo(_c,_o,#_e,__FILE__,__LINE__),_exit(1))) #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1) -void _redisAssert(char *estr, char *file, int line); -void _redisPanic(char *msg, char *file, int line); /*----------------------------------------------------------------------------- * Data types @@ -220,38 +231,13 @@ void _redisPanic(char *msg, char *file, int line); #define REDIS_LRU_CLOCK_RESOLUTION 10 /* LRU clock resolution in seconds */ typedef struct redisObject { unsigned type:4; - unsigned storage:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */ + unsigned notused:2; /* Not used */ unsigned encoding:4; unsigned lru:22; /* lru time (relative to server.lruclock) */ int refcount; void *ptr; - /* VM fields are only allocated if VM is active, otherwise the - * object allocation function will just allocate - * sizeof(redisObjct) minus sizeof(redisObjectVM), so using - * Redis without VM active will not have any overhead. */ } robj; -/* The VM pointer structure - identifies an object in the swap file. - * - * This object is stored in place of the value - * object in the main key->value hash table representing a database. - * Note that the first fields (type, storage) are the same as the redisObject - * structure so that vmPointer strucuters can be accessed even when casted - * as redisObject structures. - * - * This is useful as we don't know if a value object is or not on disk, but we - * are always able to read obj->storage to check this. For vmPointer - * structures "type" is set to REDIS_VMPOINTER (even if without this field - * is still possible to check the kind of object from the value of 'storage').*/ -typedef struct vmPointer { - unsigned type:4; - unsigned storage:2; /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */ - unsigned notused:26; - unsigned int vtype; /* type of the object stored in the swap file */ - off_t page; /* the page at witch the object is stored on disk */ - off_t usedpages; /* number of pages used on disk */ -} vmpointer; - /* Macro used to initalize a Redis object allocated on the stack. * Note that this macro is taken near the structure definition to make sure * we'll update it when the structure is changed, to avoid bugs like @@ -261,14 +247,12 @@ typedef struct vmPointer { _var.type = REDIS_STRING; \ _var.encoding = REDIS_ENCODING_RAW; \ _var.ptr = _ptr; \ - _var.storage = REDIS_DS_MEMORY; \ } while(0); typedef struct redisDb { dict *dict; /* The keyspace for this DB */ dict *expires; /* Timeout of keys with a timeout set */ dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */ - dict *io_keys; /* Keys with clients waiting for VM I/O */ dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */ int id; } redisDb; @@ -304,6 +288,7 @@ typedef struct redisClient { sds querybuf; int argc; robj **argv; + struct redisCommand *cmd; int reqtype; int multibulklen; /* number of multi bulk arguments left to read */ long bulklen; /* length of bulk argument in multi bulk request */ @@ -339,7 +324,7 @@ struct sharedObjectsStruct { robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *cnegone, *pong, *space, *colon, *nullbulk, *nullmultibulk, *queued, *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr, - *outofrangeerr, *loadingerr, *plus, + *outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *plus, *select0, *select1, *select2, *select3, *select4, *select5, *select6, *select7, *select8, *select9, *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *mbulk3, @@ -347,19 +332,176 @@ struct sharedObjectsStruct { *integers[REDIS_SHARED_INTEGERS]; }; -/* Global server state structure */ +/* ZSETs use a specialized version of Skiplists */ +typedef struct zskiplistNode { + robj *obj; + double score; + struct zskiplistNode *backward; + struct zskiplistLevel { + struct zskiplistNode *forward; + unsigned int span; + } level[]; +} zskiplistNode; + +typedef struct zskiplist { + struct zskiplistNode *header, *tail; + unsigned long length; + int level; +} zskiplist; + +typedef struct zset { + dict *dict; + zskiplist *zsl; +} zset; + +/*----------------------------------------------------------------------------- + * Redis cluster data structures + *----------------------------------------------------------------------------*/ + +#define REDIS_CLUSTER_SLOTS 4096 +#define REDIS_CLUSTER_OK 0 /* Everything looks ok */ +#define REDIS_CLUSTER_FAIL 1 /* The cluster can't work */ +#define REDIS_CLUSTER_NEEDHELP 2 /* The cluster works, but needs some help */ +#define REDIS_CLUSTER_NAMELEN 40 /* sha1 hex length */ +#define REDIS_CLUSTER_PORT_INCR 10000 /* Cluster port = baseport + PORT_INCR */ + +struct clusterNode; + +/* clusterLink encapsulates everything needed to talk with a remote node. */ +typedef struct clusterLink { + int fd; /* TCP socket file descriptor */ + sds sndbuf; /* Packet send buffer */ + sds rcvbuf; /* Packet reception buffer */ + struct clusterNode *node; /* Node related to this link if any, or NULL */ +} clusterLink; + +/* Node flags */ +#define REDIS_NODE_MASTER 1 /* The node is a master */ +#define REDIS_NODE_SLAVE 2 /* The node is a slave */ +#define REDIS_NODE_PFAIL 4 /* Failure? Need acknowledge */ +#define REDIS_NODE_FAIL 8 /* The node is believed to be malfunctioning */ +#define REDIS_NODE_MYSELF 16 /* This node is myself */ +#define REDIS_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */ +#define REDIS_NODE_NOADDR 64 /* We don't know the address of this node */ +#define REDIS_NODE_MEET 128 /* Send a MEET message to this node */ +#define REDIS_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" + +struct clusterNode { + char name[REDIS_CLUSTER_NAMELEN]; /* Node name, hex string, sha1-size */ + int flags; /* REDIS_NODE_... */ + unsigned char slots[REDIS_CLUSTER_SLOTS/8]; /* slots handled by this node */ + int numslaves; /* Number of slave nodes, if this is a master */ + struct clusterNode **slaves; /* pointers to slave nodes */ + struct clusterNode *slaveof; /* pointer to the master node */ + time_t ping_sent; /* Unix time we sent latest ping */ + time_t pong_received; /* Unix time we received the pong */ + char *configdigest; /* Configuration digest of this node */ + time_t configdigest_ts; /* Configuration digest timestamp */ + char ip[16]; /* Latest known IP address of this node */ + int port; /* Latest known port of this node */ + clusterLink *link; /* TCP/IP link with this node */ +}; +typedef struct clusterNode clusterNode; + +typedef struct { + char *configfile; + clusterNode *myself; /* This node */ + int state; /* REDIS_CLUSTER_OK, REDIS_CLUSTER_FAIL, ... */ + int node_timeout; + dict *nodes; /* Hash table of name -> clusterNode structures */ + clusterNode *migrating_slots_to[REDIS_CLUSTER_SLOTS]; + clusterNode *importing_slots_from[REDIS_CLUSTER_SLOTS]; + clusterNode *slots[REDIS_CLUSTER_SLOTS]; + zskiplist *slots_to_keys; +} clusterState; + +/* Redis cluster messages header */ + +/* Note that the PING, PONG and MEET messages are actually the same exact + * kind of packet. PONG is the reply to ping, in the extact format as a PING, + * while MEET is a special PING that forces the receiver to add the sender + * as a node (if it is not already in the list). */ +#define CLUSTERMSG_TYPE_PING 0 /* Ping */ +#define CLUSTERMSG_TYPE_PONG 1 /* Pong (reply to Ping) */ +#define CLUSTERMSG_TYPE_MEET 2 /* Meet "let's join" message */ +#define CLUSTERMSG_TYPE_FAIL 3 /* Mark node xxx as failing */ +#define CLUSTERMSG_TYPE_PUBLISH 4 /* Pub/Sub Publish propatagion */ + +/* Initially we don't know our "name", but we'll find it once we connect + * to the first node, using the getsockname() function. Then we'll use this + * address for all the next messages. */ +typedef struct { + char nodename[REDIS_CLUSTER_NAMELEN]; + uint32_t ping_sent; + uint32_t pong_received; + char ip[16]; /* IP address last time it was seen */ + uint16_t port; /* port last time it was seen */ + uint16_t flags; + uint32_t notused; /* for 64 bit alignment */ +} clusterMsgDataGossip; + +typedef struct { + char nodename[REDIS_CLUSTER_NAMELEN]; +} clusterMsgDataFail; + +typedef struct { + uint32_t channel_len; + uint32_t message_len; + unsigned char bulk_data[8]; /* defined as 8 just for alignment concerns. */ +} clusterMsgDataPublish; + +union clusterMsgData { + /* PING, MEET and PONG */ + struct { + /* Array of N clusterMsgDataGossip structures */ + clusterMsgDataGossip gossip[1]; + } ping; + + /* FAIL */ + struct { + clusterMsgDataFail about; + } fail; + + /* PUBLISH */ + struct { + clusterMsgDataPublish msg; + } publish; +}; + +typedef struct { + uint32_t totlen; /* Total length of this message */ + uint16_t type; /* Message type */ + uint16_t count; /* Only used for some kind of messages. */ + char sender[REDIS_CLUSTER_NAMELEN]; /* Name of the sender node */ + unsigned char myslots[REDIS_CLUSTER_SLOTS/8]; + char slaveof[REDIS_CLUSTER_NAMELEN]; + char configdigest[32]; + uint16_t port; /* Sender TCP base port */ + unsigned char state; /* Cluster state from the POV of the sender */ + unsigned char notused[5]; /* Reserved for future use. For alignment. */ + union clusterMsgData data; +} clusterMsg; + +/*----------------------------------------------------------------------------- + * Global server state + *----------------------------------------------------------------------------*/ + struct redisServer { - pthread_t mainthread; + /* General */ + redisDb *db; + dict *commands; /* Command table hahs table */ + aeEventLoop *el; + /* Networking */ int port; char *bindaddr; char *unixsocket; + mode_t unixsocketperm; int ipfd; int sofd; - redisDb *db; - long long dirty; /* changes to DB from the last save */ - long long dirty_before_bgsave; /* used to restore dirty on failed BGSAVE */ + int cfd; list *clients; - dict *commands; /* Command table hahs table */ + list *slaves, *monitors; + char neterr[ANET_ERR_LEN]; /* RDB / AOF loading information */ int loading; off_t loading_total_bytes; @@ -367,9 +509,6 @@ struct redisServer { time_t loading_start_time; /* Fast pointers to often looked up command */ struct redisCommand *delCommand, *multiCommand; - list *slaves, *monitors; - char neterr[ANET_ERR_LEN]; - aeEventLoop *el; int cronloops; /* number of times the cron function run */ time_t lastsave; /* Unix time of last save succeeede */ /* Fields used only for stats */ @@ -380,19 +519,35 @@ struct redisServer { long long stat_evictedkeys; /* number of evicted keys (maxmemory) */ long long stat_keyspace_hits; /* number of successful lookups of keys */ long long stat_keyspace_misses; /* number of failed lookups of keys */ + size_t stat_peak_memory; /* max used memory record */ + long long stat_fork_time; /* time needed to perform latets fork() */ + list *slowlog; + long long slowlog_entry_id; + long long slowlog_log_slower_than; + unsigned long slowlog_max_len; /* Configuration */ int verbosity; - int glueoutputbuf; int maxidletime; int dbnum; int daemonize; int appendonly; int appendfsync; int no_appendfsync_on_rewrite; + int auto_aofrewrite_perc; /* Rewrite AOF if % growth is > M and... */ + off_t auto_aofrewrite_min_size; /* the AOF file is at least N bytes. */ + off_t auto_aofrewrite_base_size;/* AOF size on latest startup or rewrite. */ + off_t appendonly_current_size; /* AOF current size. */ + int aofrewrite_scheduled; /* Rewrite once BGSAVE terminates. */ int shutdown_asap; + int activerehashing; + char *requirepass; + /* Persistence */ + long long dirty; /* changes to DB from the last save */ + long long dirty_before_bgsave; /* used to restore dirty on failed BGSAVE */ time_t lastfsync; int appendfd; int appendseldb; + time_t aof_flush_postponed_start; char *pidfile; pid_t bgsavechildpid; pid_t bgrewritechildpid; @@ -400,22 +555,24 @@ struct redisServer { sds aofbuf; /* AOF buffer, written before entering the event loop */ struct saveparam *saveparams; int saveparamslen; + char *dbfilename; + int rdbcompression; + char *appendfilename; + /* Logging */ char *logfile; int syslog_enabled; char *syslog_ident; int syslog_facility; - char *dbfilename; - char *appendfilename; - char *requirepass; - int rdbcompression; - int activerehashing; /* Replication related */ int isslave; /* Slave specific fields */ char *masterauth; char *masterhost; int masterport; + int repl_ping_slave_period; + int repl_timeout; redisClient *master; /* client that is master for this slave */ + int repl_syncio_timeout; /* timeout for synchronous I/O calls */ int replstate; /* replication status if the instance is a slave */ off_t repl_transfer_left; /* bytes left reading .rdb */ int repl_transfer_s; /* slave -> master SYNC socket */ @@ -423,6 +580,7 @@ struct redisServer { char *repl_transfer_tmpfile; /* slave-> master SYNC temp file name */ time_t repl_transfer_lastio; /* unix time of the latest read, for timeout */ int repl_serve_stale_data; /* Serve stale data when link is down? */ + time_t repl_down_since; /* unix time at which link with master went down */ /* Limits */ unsigned int maxclients; unsigned long long maxmemory; @@ -430,54 +588,40 @@ struct redisServer { int maxmemory_samples; /* Blocked clients */ unsigned int bpop_blocked_clients; - unsigned int cache_blocked_clients; - list *unblocked_clients; + list *unblocked_clients; /* list of clients to unblock before next loop */ /* Sort parameters - qsort_r() is only available under BSD so we * have to take this state global, in order to pass it to sortCompare() */ int sort_desc; int sort_alpha; int sort_bypattern; - /* Virtual memory configuration */ - int ds_enabled; /* backend disk in redis.conf */ - char *ds_path; /* location of the disk store on disk */ - unsigned long long cache_max_memory; /* Zip structure config */ size_t hash_max_zipmap_entries; size_t hash_max_zipmap_value; size_t list_max_ziplist_entries; size_t list_max_ziplist_value; size_t set_max_intset_entries; + size_t zset_max_ziplist_entries; + size_t zset_max_ziplist_value; time_t unixtime; /* Unix time sampled every second. */ - /* Virtual memory I/O threads stuff */ - /* An I/O thread process an element taken from the io_jobs queue and - * put the result of the operation in the io_done list. While the - * job is being processed, it's put on io_processing queue. */ - list *io_newjobs; /* List of VM I/O jobs yet to be processed */ - list *io_processing; /* List of VM I/O jobs being processed */ - list *io_processed; /* List of VM I/O jobs already processed */ - list *io_ready_clients; /* Clients ready to be unblocked. All keys loaded */ - pthread_mutex_t io_mutex; /* lock to access io_jobs/io_done/io_thread_job */ - pthread_mutex_t io_swapfile_mutex; /* So we can lseek + write */ - pthread_attr_t io_threads_attr; /* attributes for threads creation */ - int io_active_threads; /* Number of running I/O threads */ - int vm_max_threads; /* Max number of I/O threads running at the same time */ - /* Our main thread is blocked on the event loop, locking for sockets ready - * to be read or written, so when a threaded I/O operation is ready to be - * processed by the main thread, the I/O thread will use a unix pipe to - * awake the main thread. The followings are the two pipe FDs. */ - int io_ready_pipe_read; - int io_ready_pipe_write; - /* Virtual memory stats */ - unsigned long long vm_stats_used_pages; - unsigned long long vm_stats_swapped_objects; - unsigned long long vm_stats_swapouts; - unsigned long long vm_stats_swapins; /* Pubsub */ dict *pubsub_channels; /* Map channels to list of subscribed clients */ list *pubsub_patterns; /* A list of pubsub_patterns */ /* Misc */ unsigned lruclock:22; /* clock incrementing every minute, for LRU */ unsigned lruclock_padding:10; + /* Cluster */ + int cluster_enabled; + clusterState cluster; + /* Scripting */ + lua_State *lua; /* The Lua interpreter. We use just one for all clients */ + redisClient *lua_client; /* The "fake client" to query Redis from Lua */ + dict *lua_scripts; /* A dictionary of SHA1 -> Lua scripts */ + long long lua_time_limit; + long long lua_time_start; + int lua_random_dirty; /* True if a random command was called during the + exection of the current script. */ + int lua_timedout; /* True if we reached the time limit for script + execution. */ }; typedef struct pubsubPattern { @@ -486,20 +630,21 @@ typedef struct pubsubPattern { } pubsubPattern; typedef void redisCommandProc(redisClient *c); -typedef void redisVmPreloadProc(redisClient *c, struct redisCommand *cmd, int argc, robj **argv); +typedef int *redisGetKeysProc(struct redisCommand *cmd, robj **argv, int argc, int *numkeys, int flags); struct redisCommand { char *name; redisCommandProc *proc; int arity; - int flags; - /* Use a function to determine which keys need to be loaded - * in the background prior to executing this command. Takes precedence - * over vm_firstkey and others, ignored when NULL */ - redisVmPreloadProc *vm_preload_proc; + char *sflags; /* Flags as string represenation, one char per flag. */ + int flags; /* The actual flags, obtained from the 'sflags' field. */ + /* Use a function to determine keys arguments in a command line. + * Used for Redis Cluster redirect. */ + redisGetKeysProc *getkeys_proc; /* What keys should be loaded in background when calling this command? */ - int vm_firstkey; /* The first argument that's a key (0 = no keys) */ - int vm_lastkey; /* THe last argument that's a key */ - int vm_keystep; /* The step between first and last key */ + int firstkey; /* The first argument that's a key (0 = no keys) */ + int lastkey; /* THe last argument that's a key */ + int keystep; /* The step between first and last key */ + long long microseconds, calls; }; struct redisFunctionSym { @@ -520,40 +665,6 @@ typedef struct _redisSortOperation { robj *pattern; } redisSortOperation; -/* ZSETs use a specialized version of Skiplists */ -typedef struct zskiplistNode { - robj *obj; - double score; - struct zskiplistNode *backward; - struct zskiplistLevel { - struct zskiplistNode *forward; - unsigned int span; - } level[]; -} zskiplistNode; - -typedef struct zskiplist { - struct zskiplistNode *header, *tail; - unsigned long length; - int level; -} zskiplist; - -typedef struct zset { - dict *dict; - zskiplist *zsl; -} zset; - -/* VM threaded I/O request message */ -#define REDIS_IOJOB_LOAD 0 -#define REDIS_IOJOB_SAVE 1 - -typedef struct iojob { - int type; /* Request type, REDIS_IOJOB_* */ - redisDb *db;/* Redis database */ - robj *key; /* This I/O request is about this key */ - robj *val; /* the value to swap for REDIS_IOJOB_SAVE, otherwise this - * field is populated by the I/O thread for REDIS_IOJOB_LOAD. */ -} iojob; - /* Structure to hold list iteration abstraction. */ typedef struct { robj *subject; @@ -603,6 +714,8 @@ extern struct redisServer server; extern struct sharedObjectsStruct shared; extern dictType setDictType; extern dictType zsetDictType; +extern dictType clusterNodesDictType; +extern dictType dbDictType; extern double R_Zero, R_PosInf, R_NegInf, R_Nan; dictType hashDictType; @@ -610,13 +723,16 @@ dictType hashDictType; * Functions prototypes *----------------------------------------------------------------------------*/ +/* Utils */ +long long ustime(void); +long long mstime(void); + /* networking.c -- Networking and Client related operations */ redisClient *createClient(int fd); void closeTimedoutClients(void); void freeClient(redisClient *c); void resetClient(redisClient *c); void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask); -void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask); void addReply(redisClient *c, robj *obj); void *addDeferredMultiBulkLength(redisClient *c); void setDeferredMultiBulkLength(redisClient *c, void *node, long length); @@ -638,6 +754,10 @@ void addReplyDouble(redisClient *c, double d); void addReplyLongLong(redisClient *c, long long ll); void addReplyMultiBulkLen(redisClient *c, long length); void *dupClientReplyValue(void *o); +void getClientsMaxBuffers(unsigned long *longest_output_list, + unsigned long *biggest_input_buffer); +void rewriteClientCommandVector(redisClient *c, int argc, ...); +void rewriteClientCommandArgument(redisClient *c, int i, robj *newval); #ifdef __GNUC__ void addReplyErrorFormat(redisClient *c, const char *fmt, ...) @@ -670,13 +790,14 @@ void popGenericCommand(redisClient *c, int where); void unwatchAllKeys(redisClient *c); void initClientMultiState(redisClient *c); void freeClientMultiState(redisClient *c); -void queueMultiCommand(redisClient *c, struct redisCommand *cmd); +void queueMultiCommand(redisClient *c); void touchWatchedKey(redisDb *db, robj *key); void touchWatchedKeysOnFlush(int dbid); /* Redis object implementation */ void decrRefCount(void *o); void incrRefCount(robj *o); +robj *resetRefCount(robj *obj); void freeStringObject(robj *o); void freeListObject(robj *o); void freeSetObject(robj *o); @@ -685,21 +806,26 @@ void freeHashObject(robj *o); robj *createObject(int type, void *ptr); robj *createStringObject(char *ptr, size_t len); robj *dupStringObject(robj *o); +int isObjectRepresentableAsLongLong(robj *o, long long *llongval); robj *tryObjectEncoding(robj *o); robj *getDecodedObject(robj *o); size_t stringObjectLen(robj *o); robj *createStringObjectFromLongLong(long long value); +robj *createStringObjectFromLongDouble(long double value); robj *createListObject(void); robj *createZiplistObject(void); robj *createSetObject(void); robj *createIntsetObject(void); robj *createHashObject(void); robj *createZsetObject(void); +robj *createZsetZiplistObject(void); int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char *msg); int checkType(redisClient *c, robj *o, int type); int getLongLongFromObjectOrReply(redisClient *c, robj *o, long long *target, const char *msg); int getDoubleFromObjectOrReply(redisClient *c, robj *o, double *target, const char *msg); int getLongLongFromObject(robj *o, long long *target); +int getLongDoubleFromObject(robj *o, long double *target); +int getLongDoubleFromObjectOrReply(redisClient *c, robj *o, long double *target, const char *msg); char *strEncoding(int encoding); int compareStringObjects(robj *a, robj *b); int equalStringObjects(robj *a, robj *b); @@ -709,15 +835,10 @@ unsigned long estimateObjectIdleTime(robj *o); int syncWrite(int fd, char *ptr, ssize_t size, int timeout); int syncRead(int fd, char *ptr, ssize_t size, int timeout); int syncReadLine(int fd, char *ptr, ssize_t size, int timeout); -int fwriteBulkString(FILE *fp, char *s, unsigned long len); -int fwriteBulkDouble(FILE *fp, double d); -int fwriteBulkLongLong(FILE *fp, long long l); -int fwriteBulkObject(FILE *fp, robj *obj); /* Replication */ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc); void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc); -int syncWithMaster(void); void updateSlavesWaitingBgsave(int bgsaveerr); void replicationCron(void); @@ -727,80 +848,54 @@ void loadingProgress(off_t pos); void stopLoading(void); /* RDB persistence */ -int rdbLoad(char *filename); -int rdbSaveBackground(char *filename); -void rdbRemoveTempFile(pid_t childpid); -int rdbSave(char *filename); -int rdbSaveObject(FILE *fp, robj *o); -off_t rdbSavedObjectLen(robj *o); -off_t rdbSavedObjectPages(robj *o); -robj *rdbLoadObject(int type, FILE *fp); -void backgroundSaveDoneHandler(int statloc); +#include "rdb.h" /* AOF persistence */ -void flushAppendOnlyFile(void); +void flushAppendOnlyFile(int force); void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc); void aofRemoveTempFile(pid_t childpid); int rewriteAppendOnlyFileBackground(void); int loadAppendOnlyFile(char *filename); void stopAppendOnly(void); int startAppendOnly(void); -void backgroundRewriteDoneHandler(int statloc); +void backgroundRewriteDoneHandler(int exitcode, int bysignal); /* Sorted sets data type */ + +/* Struct to hold a inclusive/exclusive range spec. */ +typedef struct { + double min, max; + int minex, maxex; /* are min or max exclusive? */ +} zrangespec; + zskiplist *zslCreate(void); void zslFree(zskiplist *zsl); zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj); +unsigned char *zzlInsert(unsigned char *zl, robj *ele, double score); +int zslDelete(zskiplist *zsl, double score, robj *obj); +zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec range); +double zzlGetScore(unsigned char *sptr); +void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr); +void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr); +unsigned int zsetLength(robj *zobj); +void zsetConvert(robj *zobj, int encoding); /* Core functions */ void freeMemoryIfNeeded(void); int processCommand(redisClient *c); -void setupSigSegvAction(void); +void setupSignalHandlers(void); struct redisCommand *lookupCommand(sds name); struct redisCommand *lookupCommandByCString(char *s); -void call(redisClient *c, struct redisCommand *cmd); +void call(redisClient *c); int prepareForShutdown(); void redisLog(int level, const char *fmt, ...); +void redisLogRaw(int level, const char *msg); void usage(); void updateDictResizePolicy(void); int htNeedsResize(dict *dict); void oom(const char *msg); void populateCommandTable(void); - -/* Disk store */ -int dsOpen(void); -int dsClose(void); -int dsSet(redisDb *db, robj *key, robj *val); -robj *dsGet(redisDb *db, robj *key); -int dsDel(redisDb *db, robj *key); -int dsExists(redisDb *db, robj *key); - -/* Disk Store Cache */ -void vmInit(void); -void vmMarkPagesFree(off_t page, off_t count); -robj *vmLoadObject(robj *o); -robj *vmPreviewObject(robj *o); -int vmSwapOneObjectBlocking(void); -int vmSwapOneObjectThreaded(void); -int vmCanSwapOut(void); -void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata, int mask); -void vmCancelThreadedIOJob(robj *o); -void lockThreadedIO(void); -void unlockThreadedIO(void); -int vmSwapObjectThreaded(robj *key, robj *val, redisDb *db); -void freeIOJob(iojob *j); -void queueIOJob(iojob *j); -int vmWriteObjectOnSwap(robj *o, off_t page); -robj *vmReadObjectFromSwap(off_t page, int type); -void waitEmptyIOJobsQueue(void); -void vmReopenSwapFile(void); -int vmFreePage(off_t page); -void zunionInterBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv); -void execBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv); -int blockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd); -int dontWaitForSwappedKey(redisClient *c, robj *key); -void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key); -vmpointer *vmSwapObjectBlocking(robj *val); +void resetCommandTableStats(void); /* Set data type */ robj *setTypeCreate(robj *value); @@ -837,16 +932,7 @@ int pubsubUnsubscribeAllChannels(redisClient *c, int notify); int pubsubUnsubscribeAllPatterns(redisClient *c, int notify); void freePubsubPattern(void *p); int listMatchPubsubPattern(void *a, void *b); - -/* Utility functions */ -int stringmatchlen(const char *pattern, int patternLen, - const char *string, int stringLen, int nocase); -int stringmatch(const char *pattern, const char *string, int nocase); -long long memtoll(const char *p, int *err); -int ll2string(char *s, size_t len, long long value); -int isStringRepresentableAsLong(sds s, long *longval); -int isStringRepresentableAsLongLong(sds s, long long *longval); -int isObjectRepresentableAsLongLong(robj *o, long long *llongval); +int pubsubPublishMessage(robj *channel, robj *message); /* Configuration */ void loadServerConfig(char *filename); @@ -857,20 +943,46 @@ void resetServerSaveParams(); int removeExpire(redisDb *db, robj *key); void propagateExpire(redisDb *db, robj *key); int expireIfNeeded(redisDb *db, robj *key); -time_t getExpire(redisDb *db, robj *key); -void setExpire(redisDb *db, robj *key, time_t when); +long long getExpire(redisDb *db, robj *key); +void setExpire(redisDb *db, robj *key, long long when); robj *lookupKey(redisDb *db, robj *key); robj *lookupKeyRead(redisDb *db, robj *key); robj *lookupKeyWrite(redisDb *db, robj *key); robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply); robj *lookupKeyWriteOrReply(redisClient *c, robj *key, robj *reply); -int dbAdd(redisDb *db, robj *key, robj *val); -int dbReplace(redisDb *db, robj *key, robj *val); +void dbAdd(redisDb *db, robj *key, robj *val); +void dbOverwrite(redisDb *db, robj *key, robj *val); +void setKey(redisDb *db, robj *key, robj *val); int dbExists(redisDb *db, robj *key); robj *dbRandomKey(redisDb *db); int dbDelete(redisDb *db, robj *key); long long emptyDb(); int selectDb(redisClient *c, int id); +void signalModifiedKey(redisDb *db, robj *key); +void signalFlushedDb(int dbid); +unsigned int GetKeysInSlot(unsigned int hashslot, robj **keys, unsigned int count); + +/* API to get key arguments from commands */ +#define REDIS_GETKEYS_ALL 0 +#define REDIS_GETKEYS_PRELOAD 1 +int *getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, int *numkeys, int flags); +void getKeysFreeResult(int *result); +int *noPreloadGetKeys(struct redisCommand *cmd,robj **argv, int argc, int *numkeys, int flags); +int *renameGetKeys(struct redisCommand *cmd,robj **argv, int argc, int *numkeys, int flags); +int *zunionInterGetKeys(struct redisCommand *cmd,robj **argv, int argc, int *numkeys, int flags); + +/* Cluster */ +void clusterInit(void); +unsigned short crc16(const char *buf, int len); +unsigned int keyHashSlot(char *key, int keylen); +clusterNode *createClusterNode(char *nodename, int flags); +int clusterAddNode(clusterNode *node); +void clusterCron(void); +clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask); +void clusterPropagatePublish(robj *channel, robj *message); + +/* Scripting */ +void scriptingInit(void); /* Git SHA1 */ char *redisGitSHA1(void); @@ -883,6 +995,7 @@ void echoCommand(redisClient *c); void setCommand(redisClient *c); void setnxCommand(redisClient *c); void setexCommand(redisClient *c); +void psetexCommand(redisClient *c); void getCommand(redisClient *c); void delCommand(redisClient *c); void existsCommand(redisClient *c); @@ -894,6 +1007,7 @@ void incrCommand(redisClient *c); void decrCommand(redisClient *c); void incrbyCommand(redisClient *c); void decrbyCommand(redisClient *c); +void incrbyfloatCommand(redisClient *c); void selectCommand(redisClient *c); void randomkeyCommand(redisClient *c); void keysCommand(redisClient *c); @@ -943,8 +1057,11 @@ void mgetCommand(redisClient *c); void monitorCommand(redisClient *c); void expireCommand(redisClient *c); void expireatCommand(redisClient *c); +void pexpireCommand(redisClient *c); +void pexpireatCommand(redisClient *c); void getsetCommand(redisClient *c); void ttlCommand(redisClient *c); +void pttlCommand(redisClient *c); void persistCommand(redisClient *c); void slaveofCommand(redisClient *c); void debugCommand(redisClient *c); @@ -994,6 +1111,16 @@ void punsubscribeCommand(redisClient *c); void publishCommand(redisClient *c); void watchCommand(redisClient *c); void unwatchCommand(redisClient *c); +void clusterCommand(redisClient *c); +void restoreCommand(redisClient *c); +void migrateCommand(redisClient *c); +void askingCommand(redisClient *c); +void dumpCommand(redisClient *c); +void objectCommand(redisClient *c); +void clientCommand(redisClient *c); +void evalCommand(redisClient *c); +void evalShaCommand(redisClient *c); +void scriptCommand(redisClient *c); #if defined(__GNUC__) void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); @@ -1002,4 +1129,9 @@ void *malloc(size_t size) __attribute__ ((deprecated)); void *realloc(void *ptr, size_t size) __attribute__ ((deprecated)); #endif +/* Debugging stuff */ +void _redisAssertWithInfo(redisClient *c, robj *o, char *estr, char *file, int line); +void _redisAssert(char *estr, char *file, int line); +void _redisPanic(char *msg, char *file, int line); + #endif