#include <unistd.h>
#include <errno.h>
#include <inttypes.h>
+#include <pthread.h>
#include "ae.h" /* Event driven programming library */
#include "sds.h" /* Dynamic safe strings */
#include "anet.h" /* Networking the easy way */
#include "zipmap.h" /* Compact string -> string data structure */
#include "ziplist.h" /* Compact list data structure */
+#include "intset.h" /* Compact integer set structure */
#include "version.h"
/* Error codes */
#define REDIS_STATIC_ARGS 8
#define REDIS_DEFAULT_DBNUM 16
#define REDIS_CONFIGLINE_MAX 1024
-#define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
#define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
#define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
#define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
#define REDIS_SHARED_INTEGERS 10000
+#define REDIS_REPLY_CHUNK_BYTES (5*1500) /* 5 TCP packets with default MTU */
/* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
#define REDIS_WRITEV_THRESHOLD 3
/* Hash table parameters */
#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
-/* Command flags */
-#define REDIS_CMD_BULK 1 /* Bulk write command */
-#define REDIS_CMD_INLINE 2 /* Inline command */
-/* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
- this flags will return an error when the 'maxmemory' option is set in the
- config file and the server is using more than maxmemory bytes of memory.
- In short this commands are denied on low memory conditions. */
-#define REDIS_CMD_DENYOOM 4
-#define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
+/* Command flags:
+ * REDIS_CMD_DENYOOM:
+ * Commands marked with this flag will return an error when 'maxmemory' is
+ * set and the server is using more than 'maxmemory' bytes of memory.
+ * In short: commands with this flag are denied on low memory conditions.
+ * REDIS_CMD_FORCE_REPLICATION:
+ * Force replication even if dirty is 0. */
+#define REDIS_CMD_DENYOOM 4
+#define REDIS_CMD_FORCE_REPLICATION 8
/* Object types */
#define REDIS_STRING 0
#define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */
#define REDIS_ENCODING_LINKEDLIST 4 /* Encoded as regular linked list */
#define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
+#define REDIS_ENCODING_INTSET 6 /* Encoded as intset */
/* Object types only used for dumping to disk */
#define REDIS_EXPIRETIME 253
#define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
#define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
#define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
+#define REDIS_CLOSE_AFTER_REPLY 128 /* Close after writing entire reply. */
+
+/* Client request types */
+#define REDIS_REQ_INLINE 1
+#define REDIS_REQ_MULTIBULK 2
/* Slave replication state - slave side */
#define REDIS_REPL_NONE 0 /* No active replication */
#define REDIS_REPL_CONNECT 1 /* Must connect to master */
-#define REDIS_REPL_CONNECTED 2 /* Connected to master */
+#define REDIS_REPL_TRANSFER 2 /* Receiving .rdb from master */
+#define REDIS_REPL_CONNECTED 3 /* Connected to master */
/* Slave replication state - from the point of view of master
* Note that in SEND_BULK and ONLINE state the slave receives new updates
#define REDIS_HASH_MAX_ZIPMAP_VALUE 512
#define REDIS_LIST_MAX_ZIPLIST_ENTRIES 1024
#define REDIS_LIST_MAX_ZIPLIST_VALUE 32
+#define REDIS_SET_MAX_INTSET_ENTRIES 4096
/* Sets operations codes */
#define REDIS_OP_UNION 0
#define REDIS_OP_DIFF 1
#define REDIS_OP_INTER 2
+/* Redis maxmemory strategies */
+#define REDIS_MAXMEMORY_VOLATILE_LRU 0
+#define REDIS_MAXMEMORY_VOLATILE_TTL 1
+#define REDIS_MAXMEMORY_VOLATILE_RANDOM 2
+#define REDIS_MAXMEMORY_ALLKEYS_LRU 3
+#define REDIS_MAXMEMORY_ALLKEYS_RANDOM 4
+#define REDIS_MAXMEMORY_NO_EVICTION 5
+
/* We can print the stacktrace, so our assert is defined this way: */
#define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
#define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
/* A redis object, that is a type able to hold a string / list / set */
/* The actual Redis Object */
+#define REDIS_LRU_CLOCK_MAX ((1<<21)-1) /* Max value of obj->lru */
+#define REDIS_LRU_CLOCK_RESOLUTION 10 /* LRU clock resolution in seconds */
typedef struct redisObject {
unsigned type:4;
unsigned storage:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */
redisDb *db;
int dictid;
sds querybuf;
- robj **argv, **mbargv;
- int argc, mbargc;
- int bulklen; /* bulk read len. -1 if not in bulk read mode */
- int multibulk; /* multi bulk command format active */
+ int argc;
+ robj **argv;
+ int reqtype;
+ int multibulklen; /* number of multi bulk arguments left to read */
+ long bulklen; /* length of bulk argument in multi bulk request */
list *reply;
int sentlen;
time_t lastinteraction; /* time of the last interaction, used for timeout */
list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */
dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */
list *pubsub_patterns; /* patterns a client is interested in (SUBSCRIBE) */
+
+ /* Response buffer */
+ int bufpos;
+ char buf[REDIS_REPLY_CHUNK_BYTES];
} redisClient;
struct saveparam {
robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *cnegone, *pong, *space,
*colon, *nullbulk, *nullmultibulk, *queued,
*emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
- *outofrangeerr, *plus,
+ *outofrangeerr, *loadingerr, *plus,
*select0, *select1, *select2, *select3, *select4,
*select5, *select6, *select7, *select8, *select9,
*messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *mbulk3,
/* Global server state structure */
struct redisServer {
+ pthread_t mainthread;
int port;
- int fd;
+ char *bindaddr;
+ char *unixsocket;
+ int ipfd;
+ int sofd;
redisDb *db;
long long dirty; /* changes to DB from the last save */
+ long long dirty_before_bgsave; /* used to restore dirty on failed BGSAVE */
list *clients;
+ dict *commands; /* Command table hahs table */
+ /* RDB / AOF loading information */
+ int loading;
+ off_t loading_total_bytes;
+ off_t loading_loaded_bytes;
+ time_t loading_start_time;
+ /* Fast pointers to often looked up command */
+ struct redisCommand *delCommand, *multiCommand;
list *slaves, *monitors;
char neterr[ANET_ERR_LEN];
aeEventLoop *el;
int cronloops; /* number of times the cron function run */
- list *objfreelist; /* A list of freed objects to avoid malloc() */
- time_t lastsave; /* Unix time of last save succeeede */
+ time_t lastsave; /* Unix time of last save succeeede */
/* Fields used only for stats */
- time_t stat_starttime; /* server start time */
- long long stat_numcommands; /* number of processed commands */
- long long stat_numconnections; /* number of connections received */
- long long stat_expiredkeys; /* number of expired keys */
+ time_t stat_starttime; /* server start time */
+ long long stat_numcommands; /* number of processed commands */
+ long long stat_numconnections; /* number of connections received */
+ long long stat_expiredkeys; /* number of expired keys */
+ long long stat_keyspace_hits; /* number of successful lookups of keys */
+ long long stat_keyspace_misses; /* number of failed lookups of keys */
/* Configuration */
int verbosity;
int glueoutputbuf;
struct saveparam *saveparams;
int saveparamslen;
char *logfile;
- char *bindaddr;
char *dbfilename;
char *appendfilename;
char *requirepass;
int activerehashing;
/* Replication related */
int isslave;
+ /* Slave specific fields */
char *masterauth;
char *masterhost;
int masterport;
redisClient *master; /* client that is master for this slave */
- int replstate;
+ int replstate; /* replication status if the instance is a slave */
+ off_t repl_transfer_left; /* bytes left reading .rdb */
+ int repl_transfer_s; /* slave -> master SYNC socket */
+ int repl_transfer_fd; /* slave -> master SYNC temp file descriptor */
+ char *repl_transfer_tmpfile; /* slave-> master SYNC temp file name */
+ time_t repl_transfer_lastio; /* unix time of the latest read, for timeout */
+ int repl_serve_stale_data; /* Serve stale data when link is down? */
+ /* Limits */
unsigned int maxclients;
unsigned long long maxmemory;
+ int maxmemory_policy;
+ int maxmemory_samples;
+ /* Blocked clients */
unsigned int blpop_blocked_clients;
unsigned int vm_blocked_clients;
/* Sort parameters - qsort_r() is only available under BSD so we
size_t hash_max_zipmap_value;
size_t list_max_ziplist_entries;
size_t list_max_ziplist_value;
+ size_t set_max_intset_entries;
/* Virtual memory state */
FILE *vm_fp;
int vm_fd;
list *io_processed; /* List of VM I/O jobs already processed */
list *io_ready_clients; /* Clients ready to be unblocked. All keys loaded */
pthread_mutex_t io_mutex; /* lock to access io_jobs/io_done/io_thread_job */
- pthread_mutex_t obj_freelist_mutex; /* safe redis objects creation/free */
pthread_mutex_t io_swapfile_mutex; /* So we can lseek + write */
pthread_attr_t io_threads_attr; /* attributes for threads creation */
int io_active_threads; /* Number of running I/O threads */
dict *pubsub_channels; /* Map channels to list of subscribed clients */
list *pubsub_patterns; /* A list of pubsub_patterns */
/* Misc */
- FILE *devnull;
unsigned lruclock:22; /* clock incrementing every minute, for LRU */
unsigned lruclock_padding:10;
};
} redisSortOperation;
/* ZSETs use a specialized version of Skiplists */
-
typedef struct zskiplistNode {
- struct zskiplistNode **forward;
- struct zskiplistNode *backward;
- unsigned int *span;
- double score;
robj *obj;
+ double score;
+ struct zskiplistNode *backward;
+ struct zskiplistLevel {
+ struct zskiplistNode *forward;
+ unsigned int span;
+ } level[];
} zskiplistNode;
typedef struct zskiplist {
listNode *ln; /* Entry in linked list */
} listTypeEntry;
+/* Structure to hold set iteration abstraction. */
+typedef struct {
+ robj *subject;
+ int encoding;
+ int ii; /* intset iterator */
+ dictIterator *di;
+} setTypeIterator;
+
/* Structure to hold hash iteration abstration. Note that iteration over
* hashes involves both fields and values. Because it is possible that
* not both are required, store pointers in the iterator to avoid
void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask);
void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask);
void addReply(redisClient *c, robj *obj);
+void *addDeferredMultiBulkLength(redisClient *c);
+void setDeferredMultiBulkLength(redisClient *c, void *node, long length);
void addReplySds(redisClient *c, sds s);
void processInputBuffer(redisClient *c);
-void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask);
void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask);
void addReplyBulk(redisClient *c, robj *obj);
void addReplyBulkCString(redisClient *c, char *s);
void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
void addReply(redisClient *c, robj *obj);
void addReplySds(redisClient *c, sds s);
+void addReplyError(redisClient *c, char *err);
+void addReplyStatus(redisClient *c, char *status);
void addReplyDouble(redisClient *c, double d);
void addReplyLongLong(redisClient *c, long long ll);
-void addReplyUlong(redisClient *c, unsigned long ul);
+void addReplyMultiBulkLen(redisClient *c, long length);
void *dupClientReplyValue(void *o);
+#ifdef __GNUC__
+void addReplyErrorFormat(redisClient *c, const char *fmt, ...)
+ __attribute__((format(printf, 2, 3)));
+void addReplyStatusFormat(redisClient *c, const char *fmt, ...)
+ __attribute__((format(printf, 2, 3)));
+#else
+void addReplyErrorFormat(redisClient *c, const char *fmt, ...);
+void addReplyStatusFormat(redisClient *c, const char *fmt, ...);
+#endif
+
/* List data type */
void listTypeTryConversion(robj *subject, robj *value);
void listTypePush(robj *subject, robj *value, int where);
robj *tryObjectEncoding(robj *o);
robj *getDecodedObject(robj *o);
size_t stringObjectLen(robj *o);
-int tryFreeOneObjectFromFreelist(void);
robj *createStringObjectFromLongLong(long long value);
robj *createListObject(void);
robj *createZiplistObject(void);
robj *createSetObject(void);
+robj *createIntsetObject(void);
robj *createHashObject(void);
robj *createZsetObject(void);
int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char *msg);
char *strEncoding(int encoding);
int compareStringObjects(robj *a, robj *b);
int equalStringObjects(robj *a, robj *b);
+unsigned long estimateObjectIdleTime(robj *o);
+
+/* Synchronous I/O with timeout */
+int syncWrite(int fd, char *ptr, ssize_t size, int timeout);
+int syncRead(int fd, char *ptr, ssize_t size, int timeout);
+int syncReadLine(int fd, char *ptr, ssize_t size, int timeout);
+int fwriteBulkString(FILE *fp, char *s, unsigned long len);
+int fwriteBulkDouble(FILE *fp, double d);
+int fwriteBulkLongLong(FILE *fp, long long l);
+int fwriteBulkObject(FILE *fp, robj *obj);
/* Replication */
void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc);
void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc);
int syncWithMaster(void);
void updateSlavesWaitingBgsave(int bgsaveerr);
+void replicationCron(void);
+
+/* Generic persistence functions */
+void startLoading(FILE *fp);
+void loadingProgress(off_t pos);
+void stopLoading(void);
/* RDB persistence */
int rdbLoad(char *filename);
void rdbRemoveTempFile(pid_t childpid);
int rdbSave(char *filename);
int rdbSaveObject(FILE *fp, robj *o);
-off_t rdbSavedObjectPages(robj *o, FILE *fp);
-off_t rdbSavedObjectLen(robj *o, FILE *fp);
+off_t rdbSavedObjectLen(robj *o);
+off_t rdbSavedObjectPages(robj *o);
robj *rdbLoadObject(int type, FILE *fp);
void backgroundSaveDoneHandler(int statloc);
/* Sorted sets data type */
zskiplist *zslCreate(void);
void zslFree(zskiplist *zsl);
-void zslInsert(zskiplist *zsl, double score, robj *obj);
+zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj);
/* Core functions */
void freeMemoryIfNeeded(void);
int processCommand(redisClient *c);
void setupSigSegvAction(void);
-struct redisCommand *lookupCommand(char *name);
+struct redisCommand *lookupCommand(sds name);
+struct redisCommand *lookupCommandByCString(char *s);
void call(redisClient *c, struct redisCommand *cmd);
int prepareForShutdown();
void redisLog(int level, const char *fmt, ...);
void updateDictResizePolicy(void);
int htNeedsResize(dict *dict);
void oom(const char *msg);
+void populateCommandTable(void);
/* Virtual Memory */
void vmInit(void);
void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key);
vmpointer *vmSwapObjectBlocking(robj *val);
+/* Set data type */
+robj *setTypeCreate(robj *value);
+int setTypeAdd(robj *subject, robj *value);
+int setTypeRemove(robj *subject, robj *value);
+int setTypeIsMember(robj *subject, robj *value);
+setTypeIterator *setTypeInitIterator(robj *subject);
+void setTypeReleaseIterator(setTypeIterator *si);
+robj *setTypeNext(setTypeIterator *si);
+robj *setTypeRandomElement(robj *subject);
+unsigned long setTypeSize(robj *subject);
+void setTypeConvert(robj *subject, int enc);
+
/* Hash data type */
void convertToRealHash(robj *o);
void hashTypeTryConversion(robj *subject, robj **argv, int start, int end);
long long memtoll(const char *p, int *err);
int ll2string(char *s, size_t len, long long value);
int isStringRepresentableAsLong(sds s, long *longval);
+int isStringRepresentableAsLongLong(sds s, long long *longval);
+int isObjectRepresentableAsLongLong(robj *o, long long *llongval);
/* Configuration */
void loadServerConfig(char *filename);
/* db.c -- Keyspace access API */
int removeExpire(redisDb *db, robj *key);
+void propagateExpire(redisDb *db, robj *key);
int expireIfNeeded(redisDb *db, robj *key);
-int deleteIfVolatile(redisDb *db, robj *key);
time_t getExpire(redisDb *db, robj *key);
-int setExpire(redisDb *db, robj *key, time_t when);
+void setExpire(redisDb *db, robj *key, time_t when);
robj *lookupKey(redisDb *db, robj *key);
robj *lookupKeyRead(redisDb *db, robj *key);
robj *lookupKeyWrite(redisDb *db, robj *key);
void flushallCommand(redisClient *c);
void sortCommand(redisClient *c);
void lremCommand(redisClient *c);
-void rpoplpushcommand(redisClient *c);
+void rpoplpushCommand(redisClient *c);
void infoCommand(redisClient *c);
void mgetCommand(redisClient *c);
void monitorCommand(redisClient *c);
void expireatCommand(redisClient *c);
void getsetCommand(redisClient *c);
void ttlCommand(redisClient *c);
+void persistCommand(redisClient *c);
void slaveofCommand(redisClient *c);
void debugCommand(redisClient *c);
void msetCommand(redisClient *c);
void zincrbyCommand(redisClient *c);
void zrangeCommand(redisClient *c);
void zrangebyscoreCommand(redisClient *c);
+void zrevrangebyscoreCommand(redisClient *c);
void zcountCommand(redisClient *c);
void zrevrangeCommand(redisClient *c);
void zcardCommand(redisClient *c);
void brpopCommand(redisClient *c);
void appendCommand(redisClient *c);
void substrCommand(redisClient *c);
+void strlenCommand(redisClient *c);
void zrankCommand(redisClient *c);
void zrevrankCommand(redisClient *c);
void hsetCommand(redisClient *c);
void watchCommand(redisClient *c);
void unwatchCommand(redisClient *c);
+#if defined(__GNUC__)
+void *calloc(size_t count, size_t size) __attribute__ ((deprecated));
+void free(void *ptr) __attribute__ ((deprecated));
+void *malloc(size_t size) __attribute__ ((deprecated));
+void *realloc(void *ptr, size_t size) __attribute__ ((deprecated));
+#endif
+
#endif