#include <errno.h>
#include <inttypes.h>
#include <pthread.h>
+#include <syslog.h>
#include "ae.h" /* Event driven programming library */
#include "sds.h" /* Dynamic safe strings */
#define REDIS_STATIC_ARGS 8
#define REDIS_DEFAULT_DBNUM 16
#define REDIS_CONFIGLINE_MAX 1024
-#define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
#define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
#define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
#define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
#define REDIS_SHARED_INTEGERS 10000
#define REDIS_REPLY_CHUNK_BYTES (5*1500) /* 5 TCP packets with default MTU */
+#define REDIS_MAX_LOGMSG_LEN 1024 /* Default maximum length of syslog messages */
/* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
#define REDIS_WRITEV_THRESHOLD 3
#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
-/* Virtual memory object->where field. */
-#define REDIS_VM_MEMORY 0 /* The object is on memory */
-#define REDIS_VM_SWAPPED 1 /* The object is on disk */
-#define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
-#define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
-
-/* Virtual memory static configuration stuff.
- * Check vmFindContiguousPages() to know more about this magic numbers. */
-#define REDIS_VM_MAX_NEAR_PAGES 65536
-#define REDIS_VM_MAX_RANDOM_JUMP 4096
-#define REDIS_VM_MAX_THREADS 32
-#define REDIS_THREAD_STACK_SIZE (1024*1024*4)
-/* The following is the *percentage* of completed I/O jobs to process when the
- * handelr is called. While Virtual Memory I/O operations are performed by
- * threads, this operations must be processed by the main thread when completed
- * in order to take effect. */
+/* Scheduled IO opeations flags. */
+#define REDIS_IO_LOAD 1
+#define REDIS_IO_SAVE 2
+#define REDIS_IO_LOADINPROG 4
+#define REDIS_IO_SAVEINPROG 8
+
#define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
+#define REDIS_THREAD_STACK_SIZE (1024*1024*4)
/* Client flags */
#define REDIS_SLAVE 1 /* This client is a slave server */
/* Slave replication state - slave side */
#define REDIS_REPL_NONE 0 /* No active replication */
#define REDIS_REPL_CONNECT 1 /* Must connect to master */
-#define REDIS_REPL_CONNECTED 2 /* Connected to master */
+#define REDIS_REPL_TRANSFER 2 /* Receiving .rdb from master */
+#define REDIS_REPL_CONNECTED 3 /* Connected to master */
/* Slave replication state - from the point of view of master
* Note that in SEND_BULK and ONLINE state the slave receives new updates
/* Zip structure related defaults */
#define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
#define REDIS_HASH_MAX_ZIPMAP_VALUE 512
-#define REDIS_LIST_MAX_ZIPLIST_ENTRIES 1024
-#define REDIS_LIST_MAX_ZIPLIST_VALUE 32
-#define REDIS_SET_MAX_INTSET_ENTRIES 4096
+#define REDIS_LIST_MAX_ZIPLIST_ENTRIES 512
+#define REDIS_LIST_MAX_ZIPLIST_VALUE 64
+#define REDIS_SET_MAX_INTSET_ENTRIES 512
/* Sets operations codes */
#define REDIS_OP_UNION 0
#define REDIS_MAXMEMORY_VOLATILE_RANDOM 2
#define REDIS_MAXMEMORY_ALLKEYS_LRU 3
#define REDIS_MAXMEMORY_ALLKEYS_RANDOM 4
+#define REDIS_MAXMEMORY_NO_EVICTION 5
/* We can print the stacktrace, so our assert is defined this way: */
#define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
#define REDIS_LRU_CLOCK_RESOLUTION 10 /* LRU clock resolution in seconds */
typedef struct redisObject {
unsigned type:4;
- unsigned storage:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */
+ unsigned notused:2; /* Not used */
unsigned encoding:4;
unsigned lru:22; /* lru time (relative to server.lruclock) */
int refcount;
_var.type = REDIS_STRING; \
_var.encoding = REDIS_ENCODING_RAW; \
_var.ptr = _ptr; \
- _var.storage = REDIS_VM_MEMORY; \
} while(0);
typedef struct redisDb {
dict *dict; /* The keyspace for this DB */
dict *expires; /* Timeout of keys with a timeout set */
dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */
- dict *io_keys; /* Keys with clients waiting for VM I/O */
+ dict *io_keys; /* Keys with clients waiting for DS I/O */
+ dict *io_negcache; /* Negative caching for disk store */
+ dict *io_queued; /* Queued IO operations hash table */
dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
int id;
} redisDb;
int count; /* Total number of MULTI commands */
} multiState;
+typedef struct blockingState {
+ robj **keys; /* The key we are waiting to terminate a blocking
+ * operation such as BLPOP. Otherwise NULL. */
+ int count; /* Number of blocking keys */
+ time_t timeout; /* Blocking operation timeout. If UNIX current time
+ * is >= timeout then the operation timed out. */
+ robj *target; /* The key that should receive the element,
+ * for BRPOPLPUSH. */
+} blockingState;
+
/* With multiplexing we need to take per-clinet state.
* Clients are taken in a liked list. */
typedef struct redisClient {
long repldboff; /* replication DB file offset */
off_t repldbsize; /* replication DB file size */
multiState mstate; /* MULTI/EXEC state */
- robj **blocking_keys; /* The key we are waiting to terminate a blocking
- * operation such as BLPOP. Otherwise NULL. */
- int blocking_keys_num; /* Number of blocking keys */
- time_t blockingto; /* Blocking operation timeout. If UNIX current time
- * is >= blockingto then the operation timed out. */
+ blockingState bpop; /* blocking state */
list *io_keys; /* Keys this client is waiting to be loaded from the
* swap file in order to continue. */
list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */
robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *cnegone, *pong, *space,
*colon, *nullbulk, *nullmultibulk, *queued,
*emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
- *outofrangeerr, *plus,
+ *outofrangeerr, *loadingerr, *plus,
*select0, *select1, *select2, *select3, *select4,
*select5, *select6, *select7, *select8, *select9,
*messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *mbulk3,
struct redisServer {
pthread_t mainthread;
int port;
- int fd;
+ char *bindaddr;
+ char *unixsocket;
+ int ipfd;
+ int sofd;
redisDb *db;
long long dirty; /* changes to DB from the last save */
long long dirty_before_bgsave; /* used to restore dirty on failed BGSAVE */
list *clients;
+ dict *commands; /* Command table hahs table */
+ /* RDB / AOF loading information */
+ int loading;
+ off_t loading_total_bytes;
+ off_t loading_loaded_bytes;
+ time_t loading_start_time;
+ /* Fast pointers to often looked up command */
+ struct redisCommand *delCommand, *multiCommand;
list *slaves, *monitors;
char neterr[ANET_ERR_LEN];
aeEventLoop *el;
int cronloops; /* number of times the cron function run */
- list *objfreelist; /* A list of freed objects to avoid malloc() */
time_t lastsave; /* Unix time of last save succeeede */
/* Fields used only for stats */
time_t stat_starttime; /* server start time */
long long stat_numcommands; /* number of processed commands */
long long stat_numconnections; /* number of connections received */
long long stat_expiredkeys; /* number of expired keys */
+ long long stat_evictedkeys; /* number of evicted keys (maxmemory) */
long long stat_keyspace_hits; /* number of successful lookups of keys */
long long stat_keyspace_misses; /* number of failed lookups of keys */
/* Configuration */
struct saveparam *saveparams;
int saveparamslen;
char *logfile;
- char *bindaddr;
+ int syslog_enabled;
+ char *syslog_ident;
+ int syslog_facility;
char *dbfilename;
char *appendfilename;
char *requirepass;
int rdbcompression;
int activerehashing;
- /* Memory related */
- float fragmentation;
/* Replication related */
int isslave;
+ /* Slave specific fields */
char *masterauth;
char *masterhost;
int masterport;
redisClient *master; /* client that is master for this slave */
- int replstate;
+ int replstate; /* replication status if the instance is a slave */
+ off_t repl_transfer_left; /* bytes left reading .rdb */
+ int repl_transfer_s; /* slave -> master SYNC socket */
+ int repl_transfer_fd; /* slave -> master SYNC temp file descriptor */
+ char *repl_transfer_tmpfile; /* slave-> master SYNC temp file name */
+ time_t repl_transfer_lastio; /* unix time of the latest read, for timeout */
+ int repl_serve_stale_data; /* Serve stale data when link is down? */
+ /* Limits */
unsigned int maxclients;
unsigned long long maxmemory;
int maxmemory_policy;
int maxmemory_samples;
- unsigned int blpop_blocked_clients;
- unsigned int vm_blocked_clients;
+ /* Blocked clients */
+ unsigned int bpop_blocked_clients;
+ unsigned int cache_blocked_clients;
+ list *unblocked_clients; /* list of clients to unblock before next loop */
+ list *cache_io_queue; /* IO operations queue */
+ int cache_flush_delay; /* seconds to wait before flushing keys */
/* Sort parameters - qsort_r() is only available under BSD so we
* have to take this state global, in order to pass it to sortCompare() */
int sort_desc;
int sort_alpha;
int sort_bypattern;
/* Virtual memory configuration */
- int vm_enabled;
- char *vm_swap_file;
- off_t vm_page_size;
- off_t vm_pages;
- unsigned long long vm_max_memory;
+ int ds_enabled; /* backend disk in redis.conf */
+ char *ds_path; /* location of the disk store on disk */
+ unsigned long long cache_max_memory;
/* Zip structure config */
size_t hash_max_zipmap_entries;
size_t hash_max_zipmap_value;
size_t list_max_ziplist_entries;
size_t list_max_ziplist_value;
size_t set_max_intset_entries;
- /* Virtual memory state */
- FILE *vm_fp;
- int vm_fd;
- off_t vm_next_page; /* Next probably empty page */
- off_t vm_near_pages; /* Number of pages allocated sequentially */
- unsigned char *vm_bitmap; /* Bitmap of free/used pages */
time_t unixtime; /* Unix time sampled every second. */
/* Virtual memory I/O threads stuff */
/* An I/O thread process an element taken from the io_jobs queue and
list *io_processed; /* List of VM I/O jobs already processed */
list *io_ready_clients; /* Clients ready to be unblocked. All keys loaded */
pthread_mutex_t io_mutex; /* lock to access io_jobs/io_done/io_thread_job */
- pthread_mutex_t obj_freelist_mutex; /* safe redis objects creation/free */
- pthread_mutex_t io_swapfile_mutex; /* So we can lseek + write */
+ pthread_cond_t io_condvar; /* I/O threads conditional variable */
pthread_attr_t io_threads_attr; /* attributes for threads creation */
int io_active_threads; /* Number of running I/O threads */
int vm_max_threads; /* Max number of I/O threads running at the same time */
dict *pubsub_channels; /* Map channels to list of subscribed clients */
list *pubsub_patterns; /* A list of pubsub_patterns */
/* Misc */
- FILE *devnull;
unsigned lruclock:22; /* clock incrementing every minute, for LRU */
unsigned lruclock_padding:10;
};
zskiplist *zsl;
} zset;
-/* VM threaded I/O request message */
-#define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
-#define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
-#define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
+/* DIsk store threaded I/O request message */
+#define REDIS_IOJOB_LOAD 0
+#define REDIS_IOJOB_SAVE 1
+
typedef struct iojob {
int type; /* Request type, REDIS_IOJOB_* */
redisDb *db;/* Redis database */
- robj *key; /* This I/O request is about swapping this key */
- robj *id; /* Unique identifier of this job:
- this is the object to swap for REDIS_IOREQ_*_SWAP, or the
- vmpointer objct for REDIS_IOREQ_LOAD. */
- robj *val; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
- * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
- off_t page; /* Swap page where to read/write the object */
- off_t pages; /* Swap pages needed to save object. PREPARE_SWAP return val */
- int canceled; /* True if this command was canceled by blocking side of VM */
- pthread_t thread; /* ID of the thread processing this entry */
+ robj *key; /* This I/O request is about this key */
+ robj *val; /* the value to swap for REDIS_IOJOB_SAVE, otherwise this
+ * field is populated by the I/O thread for REDIS_IOJOB_LOAD. */
+ time_t expire; /* Expire time for this key on REDIS_IOJOB_LOAD */
} iojob;
+/* IO operations scheduled -- check dscache.c for more info */
+typedef struct ioop {
+ int type;
+ redisDb *db;
+ robj *key;
+ time_t ctime; /* This is the creation time of the entry. */
+} ioop;
+
/* Structure to hold list iteration abstraction. */
typedef struct {
robj *subject;
void setDeferredMultiBulkLength(redisClient *c, void *node, long length);
void addReplySds(redisClient *c, sds s);
void processInputBuffer(redisClient *c);
-void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask);
void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask);
void addReplyBulk(redisClient *c, robj *obj);
void addReplyBulkCString(redisClient *c, char *s);
+void addReplyBulkCBuffer(redisClient *c, void *p, size_t len);
+void addReplyBulkLongLong(redisClient *c, long long ll);
void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
void addReply(redisClient *c, robj *obj);
void addReplySds(redisClient *c, sds s);
robj *tryObjectEncoding(robj *o);
robj *getDecodedObject(robj *o);
size_t stringObjectLen(robj *o);
-int tryFreeOneObjectFromFreelist(void);
robj *createStringObjectFromLongLong(long long value);
robj *createListObject(void);
robj *createZiplistObject(void);
void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc);
int syncWithMaster(void);
void updateSlavesWaitingBgsave(int bgsaveerr);
+void replicationCron(void);
+
+/* Generic persistence functions */
+void startLoading(FILE *fp);
+void loadingProgress(off_t pos);
+void stopLoading(void);
/* RDB persistence */
int rdbLoad(char *filename);
void rdbRemoveTempFile(pid_t childpid);
int rdbSave(char *filename);
int rdbSaveObject(FILE *fp, robj *o);
-off_t rdbSavedObjectPages(robj *o, FILE *fp);
-off_t rdbSavedObjectLen(robj *o, FILE *fp);
+off_t rdbSavedObjectLen(robj *o);
+off_t rdbSavedObjectPages(robj *o);
robj *rdbLoadObject(int type, FILE *fp);
void backgroundSaveDoneHandler(int statloc);
+int rdbSaveKeyValuePair(FILE *fp, redisDb *db, robj *key, robj *val, time_t now);
+int rdbLoadType(FILE *fp);
+time_t rdbLoadTime(FILE *fp);
+robj *rdbLoadStringObject(FILE *fp);
/* AOF persistence */
void flushAppendOnlyFile(void);
void freeMemoryIfNeeded(void);
int processCommand(redisClient *c);
void setupSigSegvAction(void);
-struct redisCommand *lookupCommand(char *name);
+struct redisCommand *lookupCommand(sds name);
+struct redisCommand *lookupCommandByCString(char *s);
void call(redisClient *c, struct redisCommand *cmd);
int prepareForShutdown();
void redisLog(int level, const char *fmt, ...);
void updateDictResizePolicy(void);
int htNeedsResize(dict *dict);
void oom(const char *msg);
-size_t redisEstimateRSS(void);
-
-/* Virtual Memory */
-void vmInit(void);
-void vmMarkPagesFree(off_t page, off_t count);
-robj *vmLoadObject(robj *o);
-robj *vmPreviewObject(robj *o);
-int vmSwapOneObjectBlocking(void);
-int vmSwapOneObjectThreaded(void);
-int vmCanSwapOut(void);
+void populateCommandTable(void);
+
+/* Disk store */
+int dsOpen(void);
+int dsClose(void);
+int dsSet(redisDb *db, robj *key, robj *val);
+robj *dsGet(redisDb *db, robj *key, time_t *expire);
+int dsDel(redisDb *db, robj *key);
+int dsExists(redisDb *db, robj *key);
+void dsFlushDb(int dbid);
+
+/* Disk Store Cache */
+void dsInit(void);
void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata, int mask);
-void vmCancelThreadedIOJob(robj *o);
void lockThreadedIO(void);
void unlockThreadedIO(void);
-int vmSwapObjectThreaded(robj *key, robj *val, redisDb *db);
void freeIOJob(iojob *j);
void queueIOJob(iojob *j);
-int vmWriteObjectOnSwap(robj *o, off_t page);
-robj *vmReadObjectFromSwap(off_t page, int type);
void waitEmptyIOJobsQueue(void);
-void vmReopenSwapFile(void);
-int vmFreePage(off_t page);
+void processAllPendingIOJobs(void);
void zunionInterBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
void execBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
int blockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd);
int dontWaitForSwappedKey(redisClient *c, robj *key);
void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key);
-vmpointer *vmSwapObjectBlocking(robj *val);
+int cacheFreeOneEntry(void);
+void cacheScheduleIOAddFlag(redisDb *db, robj *key, long flag);
+void cacheScheduleIODelFlag(redisDb *db, robj *key, long flag);
+int cacheScheduleIOGetFlags(redisDb *db, robj *key);
+void cacheScheduleIO(redisDb *db, robj *key, int type);
+void cacheCron(void);
+int cacheKeyMayExist(redisDb *db, robj *key);
+void cacheSetKeyExists(redisDb *db, robj *key);
+void cacheSetKeyDoesNotExist(redisDb *db, robj *key);
/* Set data type */
robj *setTypeCreate(robj *value);
int setTypeIsMember(robj *subject, robj *value);
setTypeIterator *setTypeInitIterator(robj *subject);
void setTypeReleaseIterator(setTypeIterator *si);
-robj *setTypeNext(setTypeIterator *si);
-robj *setTypeRandomElement(robj *subject);
+int setTypeNext(setTypeIterator *si, robj **objele, int64_t *llele);
+robj *setTypeNextObject(setTypeIterator *si);
+int setTypeRandomElement(robj *setobj, robj **objele, int64_t *llele);
unsigned long setTypeSize(robj *subject);
void setTypeConvert(robj *subject, int enc);
void convertToRealHash(robj *o);
void hashTypeTryConversion(robj *subject, robj **argv, int start, int end);
void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2);
-robj *hashTypeGet(robj *o, robj *key);
+int hashTypeGet(robj *o, robj *key, robj **objval, unsigned char **v, unsigned int *vlen);
+robj *hashTypeGetObject(robj *o, robj *key);
int hashTypeExists(robj *o, robj *key);
int hashTypeSet(robj *o, robj *key, robj *value);
int hashTypeDelete(robj *o, robj *key);
hashTypeIterator *hashTypeInitIterator(robj *subject);
void hashTypeReleaseIterator(hashTypeIterator *hi);
int hashTypeNext(hashTypeIterator *hi);
-robj *hashTypeCurrent(hashTypeIterator *hi, int what);
+int hashTypeCurrent(hashTypeIterator *hi, int what, robj **objval, unsigned char **v, unsigned int *vlen);
+robj *hashTypeCurrentObject(hashTypeIterator *hi, int what);
robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key);
/* Pub / Sub */
int dbDelete(redisDb *db, robj *key);
long long emptyDb();
int selectDb(redisClient *c, int id);
+void signalModifiedKey(redisDb *db, robj *key);
+void signalFlushedDb(int dbid);
/* Git SHA1 */
char *redisGitSHA1(void);
void getCommand(redisClient *c);
void delCommand(redisClient *c);
void existsCommand(redisClient *c);
+void setbitCommand(redisClient *c);
+void getbitCommand(redisClient *c);
+void setrangeCommand(redisClient *c);
+void getrangeCommand(redisClient *c);
void incrCommand(redisClient *c);
void decrCommand(redisClient *c);
void incrbyCommand(redisClient *c);
void flushallCommand(redisClient *c);
void sortCommand(redisClient *c);
void lremCommand(redisClient *c);
-void rpoplpushcommand(redisClient *c);
+void rpoplpushCommand(redisClient *c);
void infoCommand(redisClient *c);
void mgetCommand(redisClient *c);
void monitorCommand(redisClient *c);
void discardCommand(redisClient *c);
void blpopCommand(redisClient *c);
void brpopCommand(redisClient *c);
+void brpoplpushCommand(redisClient *c);
void appendCommand(redisClient *c);
-void substrCommand(redisClient *c);
void strlenCommand(redisClient *c);
void zrankCommand(redisClient *c);
void zrevrankCommand(redisClient *c);