X-Git-Url: https://git.saurik.com/redis.git/blobdiff_plain/b3aa6d712e1345a57696e4d260ce49ccac253ba7..357a841714d50f3d3c293b9bb03839bac28cb05a:/src/redis.h?ds=sidebyside diff --git a/src/redis.h b/src/redis.h index 7aca2abc..0de94585 100644 --- a/src/redis.h +++ b/src/redis.h @@ -26,6 +26,7 @@ #include "anet.h" /* Networking the easy way */ #include "zipmap.h" /* Compact string -> string data structure */ #include "ziplist.h" /* Compact list data structure */ +#include "intset.h" /* Compact integer set structure */ #include "version.h" /* Error codes */ @@ -40,12 +41,12 @@ #define REDIS_STATIC_ARGS 8 #define REDIS_DEFAULT_DBNUM 16 #define REDIS_CONFIGLINE_MAX 1024 -#define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */ #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */ #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */ #define REDIS_MAX_WRITE_PER_EVENT (1024*64) #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */ #define REDIS_SHARED_INTEGERS 10000 +#define REDIS_REPLY_CHUNK_BYTES (5*1500) /* 5 TCP packets with default MTU */ /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */ #define REDIS_WRITEV_THRESHOLD 3 @@ -55,15 +56,15 @@ /* Hash table parameters */ #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */ -/* Command flags */ -#define REDIS_CMD_BULK 1 /* Bulk write command */ -#define REDIS_CMD_INLINE 2 /* Inline command */ -/* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with - this flags will return an error when the 'maxmemory' option is set in the - config file and the server is using more than maxmemory bytes of memory. - In short this commands are denied on low memory conditions. */ -#define REDIS_CMD_DENYOOM 4 -#define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */ +/* Command flags: + * REDIS_CMD_DENYOOM: + * Commands marked with this flag will return an error when 'maxmemory' is + * set and the server is using more than 'maxmemory' bytes of memory. + * In short: commands with this flag are denied on low memory conditions. + * REDIS_CMD_FORCE_REPLICATION: + * Force replication even if dirty is 0. */ +#define REDIS_CMD_DENYOOM 4 +#define REDIS_CMD_FORCE_REPLICATION 8 /* Object types */ #define REDIS_STRING 0 @@ -82,6 +83,7 @@ #define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */ #define REDIS_ENCODING_LINKEDLIST 4 /* Encoded as regular linked list */ #define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */ +#define REDIS_ENCODING_INTSET 6 /* Encoded as intset */ /* Object types only used for dumping to disk */ #define REDIS_EXPIRETIME 253 @@ -141,11 +143,17 @@ #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */ #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */ #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */ +#define REDIS_CLOSE_AFTER_REPLY 128 /* Close after writing entire reply. */ + +/* Client request types */ +#define REDIS_REQ_INLINE 1 +#define REDIS_REQ_MULTIBULK 2 /* Slave replication state - slave side */ #define REDIS_REPL_NONE 0 /* No active replication */ #define REDIS_REPL_CONNECT 1 /* Must connect to master */ -#define REDIS_REPL_CONNECTED 2 /* Connected to master */ +#define REDIS_REPL_TRANSFER 2 /* Receiving .rdb from master */ +#define REDIS_REPL_CONNECTED 3 /* Connected to master */ /* Slave replication state - from the point of view of master * Note that in SEND_BULK and ONLINE state the slave receives new updates @@ -188,12 +196,21 @@ #define REDIS_HASH_MAX_ZIPMAP_VALUE 512 #define REDIS_LIST_MAX_ZIPLIST_ENTRIES 1024 #define REDIS_LIST_MAX_ZIPLIST_VALUE 32 +#define REDIS_SET_MAX_INTSET_ENTRIES 4096 /* Sets operations codes */ #define REDIS_OP_UNION 0 #define REDIS_OP_DIFF 1 #define REDIS_OP_INTER 2 +/* Redis maxmemory strategies */ +#define REDIS_MAXMEMORY_VOLATILE_LRU 0 +#define REDIS_MAXMEMORY_VOLATILE_TTL 1 +#define REDIS_MAXMEMORY_VOLATILE_RANDOM 2 +#define REDIS_MAXMEMORY_ALLKEYS_LRU 3 +#define REDIS_MAXMEMORY_ALLKEYS_RANDOM 4 +#define REDIS_MAXMEMORY_NO_EVICTION 5 + /* We can print the stacktrace, so our assert is defined this way: */ #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1) @@ -207,6 +224,8 @@ void _redisPanic(char *msg, char *file, int line); /* A redis object, that is a type able to hold a string / list / set */ /* The actual Redis Object */ +#define REDIS_LRU_CLOCK_MAX ((1<<21)-1) /* Max value of obj->lru */ +#define REDIS_LRU_CLOCK_RESOLUTION 10 /* LRU clock resolution in seconds */ typedef struct redisObject { unsigned type:4; unsigned storage:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */ @@ -274,6 +293,16 @@ typedef struct multiState { int count; /* Total number of MULTI commands */ } multiState; +typedef struct blockingState { + robj **keys; /* The key we are waiting to terminate a blocking + * operation such as BLPOP. Otherwise NULL. */ + int count; /* Number of blocking keys */ + time_t timeout; /* Blocking operation timeout. If UNIX current time + * is >= timeout then the operation timed out. */ + robj *target; /* The key that should receive the element, + * for BRPOPLPUSH. */ +} blockingState; + /* With multiplexing we need to take per-clinet state. * Clients are taken in a liked list. */ typedef struct redisClient { @@ -281,10 +310,11 @@ typedef struct redisClient { redisDb *db; int dictid; sds querybuf; - robj **argv, **mbargv; - int argc, mbargc; - int bulklen; /* bulk read len. -1 if not in bulk read mode */ - int multibulk; /* multi bulk command format active */ + int argc; + robj **argv; + int reqtype; + int multibulklen; /* number of multi bulk arguments left to read */ + long bulklen; /* length of bulk argument in multi bulk request */ list *reply; int sentlen; time_t lastinteraction; /* time of the last interaction, used for timeout */ @@ -296,16 +326,16 @@ typedef struct redisClient { long repldboff; /* replication DB file offset */ off_t repldbsize; /* replication DB file size */ multiState mstate; /* MULTI/EXEC state */ - robj **blocking_keys; /* The key we are waiting to terminate a blocking - * operation such as BLPOP. Otherwise NULL. */ - int blocking_keys_num; /* Number of blocking keys */ - time_t blockingto; /* Blocking operation timeout. If UNIX current time - * is >= blockingto then the operation timed out. */ + blockingState bstate; /* blocking state */ list *io_keys; /* Keys this client is waiting to be loaded from the * swap file in order to continue. */ list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */ dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */ list *pubsub_patterns; /* patterns a client is interested in (SUBSCRIBE) */ + + /* Response buffer */ + int bufpos; + char buf[REDIS_REPLY_CHUNK_BYTES]; } redisClient; struct saveparam { @@ -317,7 +347,7 @@ struct sharedObjectsStruct { robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *cnegone, *pong, *space, *colon, *nullbulk, *nullmultibulk, *queued, *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr, - *outofrangeerr, *plus, + *outofrangeerr, *loadingerr, *plus, *select0, *select1, *select2, *select3, *select4, *select5, *select6, *select7, *select8, *select9, *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *mbulk3, @@ -329,21 +359,34 @@ struct sharedObjectsStruct { struct redisServer { pthread_t mainthread; int port; - int fd; + char *bindaddr; + char *unixsocket; + int ipfd; + int sofd; redisDb *db; long long dirty; /* changes to DB from the last save */ + long long dirty_before_bgsave; /* used to restore dirty on failed BGSAVE */ list *clients; + dict *commands; /* Command table hahs table */ + /* RDB / AOF loading information */ + int loading; + off_t loading_total_bytes; + off_t loading_loaded_bytes; + time_t loading_start_time; + /* Fast pointers to often looked up command */ + struct redisCommand *delCommand, *multiCommand; list *slaves, *monitors; char neterr[ANET_ERR_LEN]; aeEventLoop *el; int cronloops; /* number of times the cron function run */ - list *objfreelist; /* A list of freed objects to avoid malloc() */ - time_t lastsave; /* Unix time of last save succeeede */ + time_t lastsave; /* Unix time of last save succeeede */ /* Fields used only for stats */ - time_t stat_starttime; /* server start time */ - long long stat_numcommands; /* number of processed commands */ - long long stat_numconnections; /* number of connections received */ - long long stat_expiredkeys; /* number of expired keys */ + time_t stat_starttime; /* server start time */ + long long stat_numcommands; /* number of processed commands */ + long long stat_numconnections; /* number of connections received */ + long long stat_expiredkeys; /* number of expired keys */ + long long stat_keyspace_hits; /* number of successful lookups of keys */ + long long stat_keyspace_misses; /* number of failed lookups of keys */ /* Configuration */ int verbosity; int glueoutputbuf; @@ -365,7 +408,6 @@ struct redisServer { struct saveparam *saveparams; int saveparamslen; char *logfile; - char *bindaddr; char *dbfilename; char *appendfilename; char *requirepass; @@ -373,13 +415,24 @@ struct redisServer { int activerehashing; /* Replication related */ int isslave; + /* Slave specific fields */ char *masterauth; char *masterhost; int masterport; redisClient *master; /* client that is master for this slave */ - int replstate; + int replstate; /* replication status if the instance is a slave */ + off_t repl_transfer_left; /* bytes left reading .rdb */ + int repl_transfer_s; /* slave -> master SYNC socket */ + int repl_transfer_fd; /* slave -> master SYNC temp file descriptor */ + char *repl_transfer_tmpfile; /* slave-> master SYNC temp file name */ + time_t repl_transfer_lastio; /* unix time of the latest read, for timeout */ + int repl_serve_stale_data; /* Serve stale data when link is down? */ + /* Limits */ unsigned int maxclients; unsigned long long maxmemory; + int maxmemory_policy; + int maxmemory_samples; + /* Blocked clients */ unsigned int blpop_blocked_clients; unsigned int vm_blocked_clients; /* Sort parameters - qsort_r() is only available under BSD so we @@ -398,6 +451,7 @@ struct redisServer { size_t hash_max_zipmap_value; size_t list_max_ziplist_entries; size_t list_max_ziplist_value; + size_t set_max_intset_entries; /* Virtual memory state */ FILE *vm_fp; int vm_fd; @@ -414,7 +468,6 @@ struct redisServer { list *io_processed; /* List of VM I/O jobs already processed */ list *io_ready_clients; /* Clients ready to be unblocked. All keys loaded */ pthread_mutex_t io_mutex; /* lock to access io_jobs/io_done/io_thread_job */ - pthread_mutex_t obj_freelist_mutex; /* safe redis objects creation/free */ pthread_mutex_t io_swapfile_mutex; /* So we can lseek + write */ pthread_attr_t io_threads_attr; /* attributes for threads creation */ int io_active_threads; /* Number of running I/O threads */ @@ -434,7 +487,6 @@ struct redisServer { dict *pubsub_channels; /* Map channels to list of subscribed clients */ list *pubsub_patterns; /* A list of pubsub_patterns */ /* Misc */ - FILE *devnull; unsigned lruclock:22; /* clock incrementing every minute, for LRU */ unsigned lruclock_padding:10; }; @@ -480,13 +532,14 @@ typedef struct _redisSortOperation { } redisSortOperation; /* ZSETs use a specialized version of Skiplists */ - typedef struct zskiplistNode { - struct zskiplistNode **forward; - struct zskiplistNode *backward; - unsigned int *span; - double score; robj *obj; + double score; + struct zskiplistNode *backward; + struct zskiplistLevel { + struct zskiplistNode *forward; + unsigned int span; + } level[]; } zskiplistNode; typedef struct zskiplist { @@ -535,6 +588,14 @@ typedef struct { listNode *ln; /* Entry in linked list */ } listTypeEntry; +/* Structure to hold set iteration abstraction. */ +typedef struct { + robj *subject; + int encoding; + int ii; /* intset iterator */ + dictIterator *di; +} setTypeIterator; + /* Structure to hold hash iteration abstration. Note that iteration over * hashes involves both fields and values. Because it is possible that * not both are required, store pointers in the iterator to avoid @@ -575,20 +636,35 @@ void resetClient(redisClient *c); void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask); void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask); void addReply(redisClient *c, robj *obj); +void *addDeferredMultiBulkLength(redisClient *c); +void setDeferredMultiBulkLength(redisClient *c, void *node, long length); void addReplySds(redisClient *c, sds s); void processInputBuffer(redisClient *c); -void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask); +void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask); +void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask); void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask); void addReplyBulk(redisClient *c, robj *obj); void addReplyBulkCString(redisClient *c, char *s); void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask); void addReply(redisClient *c, robj *obj); void addReplySds(redisClient *c, sds s); +void addReplyError(redisClient *c, char *err); +void addReplyStatus(redisClient *c, char *status); void addReplyDouble(redisClient *c, double d); void addReplyLongLong(redisClient *c, long long ll); -void addReplyUlong(redisClient *c, unsigned long ul); +void addReplyMultiBulkLen(redisClient *c, long length); void *dupClientReplyValue(void *o); +#ifdef __GNUC__ +void addReplyErrorFormat(redisClient *c, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); +void addReplyStatusFormat(redisClient *c, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); +#else +void addReplyErrorFormat(redisClient *c, const char *fmt, ...); +void addReplyStatusFormat(redisClient *c, const char *fmt, ...); +#endif + /* List data type */ void listTypeTryConversion(robj *subject, robj *value); void listTypePush(robj *subject, robj *value, int where); @@ -628,11 +704,11 @@ robj *dupStringObject(robj *o); robj *tryObjectEncoding(robj *o); robj *getDecodedObject(robj *o); size_t stringObjectLen(robj *o); -int tryFreeOneObjectFromFreelist(void); robj *createStringObjectFromLongLong(long long value); robj *createListObject(void); robj *createZiplistObject(void); robj *createSetObject(void); +robj *createIntsetObject(void); robj *createHashObject(void); robj *createZsetObject(void); int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char *msg); @@ -643,12 +719,28 @@ int getLongLongFromObject(robj *o, long long *target); char *strEncoding(int encoding); int compareStringObjects(robj *a, robj *b); int equalStringObjects(robj *a, robj *b); +unsigned long estimateObjectIdleTime(robj *o); + +/* Synchronous I/O with timeout */ +int syncWrite(int fd, char *ptr, ssize_t size, int timeout); +int syncRead(int fd, char *ptr, ssize_t size, int timeout); +int syncReadLine(int fd, char *ptr, ssize_t size, int timeout); +int fwriteBulkString(FILE *fp, char *s, unsigned long len); +int fwriteBulkDouble(FILE *fp, double d); +int fwriteBulkLongLong(FILE *fp, long long l); +int fwriteBulkObject(FILE *fp, robj *obj); /* Replication */ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc); void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc); int syncWithMaster(void); void updateSlavesWaitingBgsave(int bgsaveerr); +void replicationCron(void); + +/* Generic persistence functions */ +void startLoading(FILE *fp); +void loadingProgress(off_t pos); +void stopLoading(void); /* RDB persistence */ int rdbLoad(char *filename); @@ -656,8 +748,8 @@ int rdbSaveBackground(char *filename); void rdbRemoveTempFile(pid_t childpid); int rdbSave(char *filename); int rdbSaveObject(FILE *fp, robj *o); -off_t rdbSavedObjectPages(robj *o, FILE *fp); -off_t rdbSavedObjectLen(robj *o, FILE *fp); +off_t rdbSavedObjectLen(robj *o); +off_t rdbSavedObjectPages(robj *o); robj *rdbLoadObject(int type, FILE *fp); void backgroundSaveDoneHandler(int statloc); @@ -674,13 +766,14 @@ void backgroundRewriteDoneHandler(int statloc); /* Sorted sets data type */ zskiplist *zslCreate(void); void zslFree(zskiplist *zsl); -void zslInsert(zskiplist *zsl, double score, robj *obj); +zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj); /* Core functions */ void freeMemoryIfNeeded(void); int processCommand(redisClient *c); void setupSigSegvAction(void); -struct redisCommand *lookupCommand(char *name); +struct redisCommand *lookupCommand(sds name); +struct redisCommand *lookupCommandByCString(char *s); void call(redisClient *c, struct redisCommand *cmd); int prepareForShutdown(); void redisLog(int level, const char *fmt, ...); @@ -688,6 +781,7 @@ void usage(); void updateDictResizePolicy(void); int htNeedsResize(dict *dict); void oom(const char *msg); +void populateCommandTable(void); /* Virtual Memory */ void vmInit(void); @@ -716,6 +810,18 @@ int dontWaitForSwappedKey(redisClient *c, robj *key); void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key); vmpointer *vmSwapObjectBlocking(robj *val); +/* Set data type */ +robj *setTypeCreate(robj *value); +int setTypeAdd(robj *subject, robj *value); +int setTypeRemove(robj *subject, robj *value); +int setTypeIsMember(robj *subject, robj *value); +setTypeIterator *setTypeInitIterator(robj *subject); +void setTypeReleaseIterator(setTypeIterator *si); +robj *setTypeNext(setTypeIterator *si); +robj *setTypeRandomElement(robj *subject); +unsigned long setTypeSize(robj *subject); +void setTypeConvert(robj *subject, int enc); + /* Hash data type */ void convertToRealHash(robj *o); void hashTypeTryConversion(robj *subject, robj **argv, int start, int end); @@ -744,6 +850,8 @@ int stringmatch(const char *pattern, const char *string, int nocase); long long memtoll(const char *p, int *err); int ll2string(char *s, size_t len, long long value); int isStringRepresentableAsLong(sds s, long *longval); +int isStringRepresentableAsLongLong(sds s, long long *longval); +int isObjectRepresentableAsLongLong(robj *o, long long *llongval); /* Configuration */ void loadServerConfig(char *filename); @@ -752,10 +860,10 @@ void resetServerSaveParams(); /* db.c -- Keyspace access API */ int removeExpire(redisDb *db, robj *key); +void propagateExpire(redisDb *db, robj *key); int expireIfNeeded(redisDb *db, robj *key); -int deleteIfVolatile(redisDb *db, robj *key); time_t getExpire(redisDb *db, robj *key); -int setExpire(redisDb *db, robj *key, time_t when); +void setExpire(redisDb *db, robj *key, time_t when); robj *lookupKey(redisDb *db, robj *key); robj *lookupKeyRead(redisDb *db, robj *key); robj *lookupKeyWrite(redisDb *db, robj *key); @@ -830,7 +938,7 @@ void flushdbCommand(redisClient *c); void flushallCommand(redisClient *c); void sortCommand(redisClient *c); void lremCommand(redisClient *c); -void rpoplpushcommand(redisClient *c); +void rpoplpushCommand(redisClient *c); void infoCommand(redisClient *c); void mgetCommand(redisClient *c); void monitorCommand(redisClient *c); @@ -838,6 +946,7 @@ void expireCommand(redisClient *c); void expireatCommand(redisClient *c); void getsetCommand(redisClient *c); void ttlCommand(redisClient *c); +void persistCommand(redisClient *c); void slaveofCommand(redisClient *c); void debugCommand(redisClient *c); void msetCommand(redisClient *c); @@ -846,6 +955,7 @@ void zaddCommand(redisClient *c); void zincrbyCommand(redisClient *c); void zrangeCommand(redisClient *c); void zrangebyscoreCommand(redisClient *c); +void zrevrangebyscoreCommand(redisClient *c); void zcountCommand(redisClient *c); void zrevrangeCommand(redisClient *c); void zcardCommand(redisClient *c); @@ -857,8 +967,10 @@ void execCommand(redisClient *c); void discardCommand(redisClient *c); void blpopCommand(redisClient *c); void brpopCommand(redisClient *c); +void brpoplpushCommand(redisClient *c); void appendCommand(redisClient *c); void substrCommand(redisClient *c); +void strlenCommand(redisClient *c); void zrankCommand(redisClient *c); void zrevrankCommand(redisClient *c); void hsetCommand(redisClient *c); @@ -886,7 +998,6 @@ void watchCommand(redisClient *c); void unwatchCommand(redisClient *c); #if defined(__GNUC__) -void *malloc(size_t size) __attribute__ ((deprecated)); void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); void free(void *ptr) __attribute__ ((deprecated)); void *malloc(size_t size) __attribute__ ((deprecated));