]>
Commit | Line | Data |
---|---|---|
e2641e09 | 1 | #ifndef __REDIS_H |
2 | #define __REDIS_H | |
3 | ||
4 | #include "fmacros.h" | |
5 | #include "config.h" | |
6 | ||
7 | #if defined(__sun) | |
8 | #include "solarisfixes.h" | |
9 | #endif | |
10 | ||
11 | #include <stdio.h> | |
12 | #include <stdlib.h> | |
13 | #include <string.h> | |
14 | #include <time.h> | |
15 | #include <limits.h> | |
16 | #include <unistd.h> | |
17 | #include <errno.h> | |
3688d7f3 | 18 | #include <inttypes.h> |
d06a5b23 | 19 | #include <pthread.h> |
e2641e09 | 20 | |
21 | #include "ae.h" /* Event driven programming library */ | |
22 | #include "sds.h" /* Dynamic safe strings */ | |
23 | #include "dict.h" /* Hash tables */ | |
24 | #include "adlist.h" /* Linked lists */ | |
25 | #include "zmalloc.h" /* total memory usage aware version of malloc/free */ | |
26 | #include "anet.h" /* Networking the easy way */ | |
27 | #include "zipmap.h" /* Compact string -> string data structure */ | |
28 | #include "ziplist.h" /* Compact list data structure */ | |
29 | #include "version.h" | |
30 | ||
31 | /* Error codes */ | |
32 | #define REDIS_OK 0 | |
33 | #define REDIS_ERR -1 | |
34 | ||
35 | /* Static server configuration */ | |
36 | #define REDIS_SERVERPORT 6379 /* TCP port */ | |
37 | #define REDIS_MAXIDLETIME (60*5) /* default client timeout */ | |
38 | #define REDIS_IOBUF_LEN 1024 | |
39 | #define REDIS_LOADBUF_LEN 1024 | |
40 | #define REDIS_STATIC_ARGS 8 | |
41 | #define REDIS_DEFAULT_DBNUM 16 | |
42 | #define REDIS_CONFIGLINE_MAX 1024 | |
43 | #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */ | |
44 | #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */ | |
45 | #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */ | |
46 | #define REDIS_MAX_WRITE_PER_EVENT (1024*64) | |
47 | #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */ | |
48 | #define REDIS_SHARED_INTEGERS 10000 | |
49 | ||
50 | /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */ | |
51 | #define REDIS_WRITEV_THRESHOLD 3 | |
52 | /* Max number of iovecs used for each writev call */ | |
53 | #define REDIS_WRITEV_IOVEC_COUNT 256 | |
54 | ||
55 | /* Hash table parameters */ | |
56 | #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */ | |
57 | ||
58 | /* Command flags */ | |
59 | #define REDIS_CMD_BULK 1 /* Bulk write command */ | |
60 | #define REDIS_CMD_INLINE 2 /* Inline command */ | |
61 | /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with | |
62 | this flags will return an error when the 'maxmemory' option is set in the | |
63 | config file and the server is using more than maxmemory bytes of memory. | |
64 | In short this commands are denied on low memory conditions. */ | |
65 | #define REDIS_CMD_DENYOOM 4 | |
66 | #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */ | |
67 | ||
68 | /* Object types */ | |
69 | #define REDIS_STRING 0 | |
70 | #define REDIS_LIST 1 | |
71 | #define REDIS_SET 2 | |
72 | #define REDIS_ZSET 3 | |
73 | #define REDIS_HASH 4 | |
74 | #define REDIS_VMPOINTER 8 | |
75 | ||
76 | /* Objects encoding. Some kind of objects like Strings and Hashes can be | |
77 | * internally represented in multiple ways. The 'encoding' field of the object | |
78 | * is set to one of this fields for this object. */ | |
79 | #define REDIS_ENCODING_RAW 0 /* Raw representation */ | |
80 | #define REDIS_ENCODING_INT 1 /* Encoded as integer */ | |
81 | #define REDIS_ENCODING_HT 2 /* Encoded as hash table */ | |
82 | #define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */ | |
83 | #define REDIS_ENCODING_LINKEDLIST 4 /* Encoded as regular linked list */ | |
84 | #define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */ | |
85 | ||
86 | /* Object types only used for dumping to disk */ | |
87 | #define REDIS_EXPIRETIME 253 | |
88 | #define REDIS_SELECTDB 254 | |
89 | #define REDIS_EOF 255 | |
90 | ||
91 | /* Defines related to the dump file format. To store 32 bits lengths for short | |
92 | * keys requires a lot of space, so we check the most significant 2 bits of | |
93 | * the first byte to interpreter the length: | |
94 | * | |
95 | * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte | |
96 | * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte | |
97 | * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow | |
98 | * 11|000000 this means: specially encoded object will follow. The six bits | |
99 | * number specify the kind of object that follows. | |
100 | * See the REDIS_RDB_ENC_* defines. | |
101 | * | |
102 | * Lenghts up to 63 are stored using a single byte, most DB keys, and may | |
103 | * values, will fit inside. */ | |
104 | #define REDIS_RDB_6BITLEN 0 | |
105 | #define REDIS_RDB_14BITLEN 1 | |
106 | #define REDIS_RDB_32BITLEN 2 | |
107 | #define REDIS_RDB_ENCVAL 3 | |
108 | #define REDIS_RDB_LENERR UINT_MAX | |
109 | ||
110 | /* When a length of a string object stored on disk has the first two bits | |
111 | * set, the remaining two bits specify a special encoding for the object | |
112 | * accordingly to the following defines: */ | |
113 | #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */ | |
114 | #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */ | |
115 | #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */ | |
116 | #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */ | |
117 | ||
118 | /* Virtual memory object->where field. */ | |
119 | #define REDIS_VM_MEMORY 0 /* The object is on memory */ | |
120 | #define REDIS_VM_SWAPPED 1 /* The object is on disk */ | |
121 | #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */ | |
122 | #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */ | |
123 | ||
124 | /* Virtual memory static configuration stuff. | |
125 | * Check vmFindContiguousPages() to know more about this magic numbers. */ | |
126 | #define REDIS_VM_MAX_NEAR_PAGES 65536 | |
127 | #define REDIS_VM_MAX_RANDOM_JUMP 4096 | |
128 | #define REDIS_VM_MAX_THREADS 32 | |
129 | #define REDIS_THREAD_STACK_SIZE (1024*1024*4) | |
130 | /* The following is the *percentage* of completed I/O jobs to process when the | |
131 | * handelr is called. While Virtual Memory I/O operations are performed by | |
132 | * threads, this operations must be processed by the main thread when completed | |
133 | * in order to take effect. */ | |
134 | #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1 | |
135 | ||
136 | /* Client flags */ | |
137 | #define REDIS_SLAVE 1 /* This client is a slave server */ | |
138 | #define REDIS_MASTER 2 /* This client is a master server */ | |
139 | #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */ | |
140 | #define REDIS_MULTI 8 /* This client is in a MULTI context */ | |
141 | #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */ | |
142 | #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */ | |
143 | #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */ | |
144 | ||
145 | /* Slave replication state - slave side */ | |
146 | #define REDIS_REPL_NONE 0 /* No active replication */ | |
147 | #define REDIS_REPL_CONNECT 1 /* Must connect to master */ | |
148 | #define REDIS_REPL_CONNECTED 2 /* Connected to master */ | |
149 | ||
150 | /* Slave replication state - from the point of view of master | |
151 | * Note that in SEND_BULK and ONLINE state the slave receives new updates | |
152 | * in its output queue. In the WAIT_BGSAVE state instead the server is waiting | |
153 | * to start the next background saving in order to send updates to it. */ | |
154 | #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */ | |
155 | #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */ | |
156 | #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */ | |
157 | #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */ | |
158 | ||
159 | /* List related stuff */ | |
160 | #define REDIS_HEAD 0 | |
161 | #define REDIS_TAIL 1 | |
162 | ||
163 | /* Sort operations */ | |
164 | #define REDIS_SORT_GET 0 | |
165 | #define REDIS_SORT_ASC 1 | |
166 | #define REDIS_SORT_DESC 2 | |
167 | #define REDIS_SORTKEY_MAX 1024 | |
168 | ||
169 | /* Log levels */ | |
170 | #define REDIS_DEBUG 0 | |
171 | #define REDIS_VERBOSE 1 | |
172 | #define REDIS_NOTICE 2 | |
173 | #define REDIS_WARNING 3 | |
174 | ||
175 | /* Anti-warning macro... */ | |
176 | #define REDIS_NOTUSED(V) ((void) V) | |
177 | ||
178 | #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */ | |
179 | #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */ | |
180 | ||
181 | /* Append only defines */ | |
182 | #define APPENDFSYNC_NO 0 | |
183 | #define APPENDFSYNC_ALWAYS 1 | |
184 | #define APPENDFSYNC_EVERYSEC 2 | |
185 | ||
186 | /* Zip structure related defaults */ | |
187 | #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64 | |
188 | #define REDIS_HASH_MAX_ZIPMAP_VALUE 512 | |
189 | #define REDIS_LIST_MAX_ZIPLIST_ENTRIES 1024 | |
190 | #define REDIS_LIST_MAX_ZIPLIST_VALUE 32 | |
191 | ||
192 | /* Sets operations codes */ | |
193 | #define REDIS_OP_UNION 0 | |
194 | #define REDIS_OP_DIFF 1 | |
195 | #define REDIS_OP_INTER 2 | |
196 | ||
197 | /* We can print the stacktrace, so our assert is defined this way: */ | |
198 | #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) | |
199 | #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1) | |
200 | void _redisAssert(char *estr, char *file, int line); | |
201 | void _redisPanic(char *msg, char *file, int line); | |
202 | ||
203 | /*----------------------------------------------------------------------------- | |
204 | * Data types | |
205 | *----------------------------------------------------------------------------*/ | |
206 | ||
207 | /* A redis object, that is a type able to hold a string / list / set */ | |
208 | ||
209 | /* The actual Redis Object */ | |
210 | typedef struct redisObject { | |
211 | unsigned type:4; | |
212 | unsigned storage:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */ | |
213 | unsigned encoding:4; | |
214 | unsigned lru:22; /* lru time (relative to server.lruclock) */ | |
215 | int refcount; | |
216 | void *ptr; | |
217 | /* VM fields are only allocated if VM is active, otherwise the | |
218 | * object allocation function will just allocate | |
219 | * sizeof(redisObjct) minus sizeof(redisObjectVM), so using | |
220 | * Redis without VM active will not have any overhead. */ | |
221 | } robj; | |
222 | ||
223 | /* The VM pointer structure - identifies an object in the swap file. | |
224 | * | |
225 | * This object is stored in place of the value | |
226 | * object in the main key->value hash table representing a database. | |
227 | * Note that the first fields (type, storage) are the same as the redisObject | |
228 | * structure so that vmPointer strucuters can be accessed even when casted | |
229 | * as redisObject structures. | |
230 | * | |
231 | * This is useful as we don't know if a value object is or not on disk, but we | |
232 | * are always able to read obj->storage to check this. For vmPointer | |
233 | * structures "type" is set to REDIS_VMPOINTER (even if without this field | |
234 | * is still possible to check the kind of object from the value of 'storage').*/ | |
235 | typedef struct vmPointer { | |
236 | unsigned type:4; | |
237 | unsigned storage:2; /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */ | |
238 | unsigned notused:26; | |
239 | unsigned int vtype; /* type of the object stored in the swap file */ | |
240 | off_t page; /* the page at witch the object is stored on disk */ | |
241 | off_t usedpages; /* number of pages used on disk */ | |
242 | } vmpointer; | |
243 | ||
244 | /* Macro used to initalize a Redis object allocated on the stack. | |
245 | * Note that this macro is taken near the structure definition to make sure | |
246 | * we'll update it when the structure is changed, to avoid bugs like | |
247 | * bug #85 introduced exactly in this way. */ | |
248 | #define initStaticStringObject(_var,_ptr) do { \ | |
249 | _var.refcount = 1; \ | |
250 | _var.type = REDIS_STRING; \ | |
251 | _var.encoding = REDIS_ENCODING_RAW; \ | |
252 | _var.ptr = _ptr; \ | |
253 | _var.storage = REDIS_VM_MEMORY; \ | |
254 | } while(0); | |
255 | ||
256 | typedef struct redisDb { | |
257 | dict *dict; /* The keyspace for this DB */ | |
258 | dict *expires; /* Timeout of keys with a timeout set */ | |
259 | dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */ | |
260 | dict *io_keys; /* Keys with clients waiting for VM I/O */ | |
261 | dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */ | |
262 | int id; | |
263 | } redisDb; | |
264 | ||
265 | /* Client MULTI/EXEC state */ | |
266 | typedef struct multiCmd { | |
267 | robj **argv; | |
268 | int argc; | |
269 | struct redisCommand *cmd; | |
270 | } multiCmd; | |
271 | ||
272 | typedef struct multiState { | |
273 | multiCmd *commands; /* Array of MULTI commands */ | |
274 | int count; /* Total number of MULTI commands */ | |
275 | } multiState; | |
276 | ||
277 | /* With multiplexing we need to take per-clinet state. | |
278 | * Clients are taken in a liked list. */ | |
279 | typedef struct redisClient { | |
280 | int fd; | |
281 | redisDb *db; | |
282 | int dictid; | |
283 | sds querybuf; | |
284 | robj **argv, **mbargv; | |
285 | int argc, mbargc; | |
286 | int bulklen; /* bulk read len. -1 if not in bulk read mode */ | |
287 | int multibulk; /* multi bulk command format active */ | |
288 | list *reply; | |
289 | int sentlen; | |
290 | time_t lastinteraction; /* time of the last interaction, used for timeout */ | |
291 | int flags; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */ | |
292 | int slaveseldb; /* slave selected db, if this client is a slave */ | |
293 | int authenticated; /* when requirepass is non-NULL */ | |
294 | int replstate; /* replication state if this is a slave */ | |
295 | int repldbfd; /* replication DB file descriptor */ | |
296 | long repldboff; /* replication DB file offset */ | |
297 | off_t repldbsize; /* replication DB file size */ | |
298 | multiState mstate; /* MULTI/EXEC state */ | |
299 | robj **blocking_keys; /* The key we are waiting to terminate a blocking | |
300 | * operation such as BLPOP. Otherwise NULL. */ | |
301 | int blocking_keys_num; /* Number of blocking keys */ | |
302 | time_t blockingto; /* Blocking operation timeout. If UNIX current time | |
303 | * is >= blockingto then the operation timed out. */ | |
304 | list *io_keys; /* Keys this client is waiting to be loaded from the | |
305 | * swap file in order to continue. */ | |
306 | list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */ | |
307 | dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */ | |
308 | list *pubsub_patterns; /* patterns a client is interested in (SUBSCRIBE) */ | |
309 | } redisClient; | |
310 | ||
311 | struct saveparam { | |
312 | time_t seconds; | |
313 | int changes; | |
314 | }; | |
315 | ||
316 | struct sharedObjectsStruct { | |
317 | robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *cnegone, *pong, *space, | |
318 | *colon, *nullbulk, *nullmultibulk, *queued, | |
319 | *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr, | |
320 | *outofrangeerr, *plus, | |
321 | *select0, *select1, *select2, *select3, *select4, | |
322 | *select5, *select6, *select7, *select8, *select9, | |
323 | *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *mbulk3, | |
324 | *mbulk4, *psubscribebulk, *punsubscribebulk, | |
325 | *integers[REDIS_SHARED_INTEGERS]; | |
326 | }; | |
327 | ||
328 | /* Global server state structure */ | |
329 | struct redisServer { | |
0e5441d8 | 330 | pthread_t mainthread; |
e2641e09 | 331 | int port; |
332 | int fd; | |
333 | redisDb *db; | |
334 | long long dirty; /* changes to DB from the last save */ | |
335 | list *clients; | |
336 | list *slaves, *monitors; | |
337 | char neterr[ANET_ERR_LEN]; | |
338 | aeEventLoop *el; | |
339 | int cronloops; /* number of times the cron function run */ | |
340 | list *objfreelist; /* A list of freed objects to avoid malloc() */ | |
341 | time_t lastsave; /* Unix time of last save succeeede */ | |
342 | /* Fields used only for stats */ | |
343 | time_t stat_starttime; /* server start time */ | |
344 | long long stat_numcommands; /* number of processed commands */ | |
345 | long long stat_numconnections; /* number of connections received */ | |
346 | long long stat_expiredkeys; /* number of expired keys */ | |
347 | /* Configuration */ | |
348 | int verbosity; | |
349 | int glueoutputbuf; | |
350 | int maxidletime; | |
351 | int dbnum; | |
352 | int daemonize; | |
353 | int appendonly; | |
354 | int appendfsync; | |
355 | int no_appendfsync_on_rewrite; | |
356 | int shutdown_asap; | |
357 | time_t lastfsync; | |
358 | int appendfd; | |
359 | int appendseldb; | |
360 | char *pidfile; | |
361 | pid_t bgsavechildpid; | |
362 | pid_t bgrewritechildpid; | |
363 | sds bgrewritebuf; /* buffer taken by parent during oppend only rewrite */ | |
364 | sds aofbuf; /* AOF buffer, written before entering the event loop */ | |
365 | struct saveparam *saveparams; | |
366 | int saveparamslen; | |
367 | char *logfile; | |
368 | char *bindaddr; | |
369 | char *dbfilename; | |
370 | char *appendfilename; | |
371 | char *requirepass; | |
372 | int rdbcompression; | |
373 | int activerehashing; | |
374 | /* Replication related */ | |
375 | int isslave; | |
376 | char *masterauth; | |
377 | char *masterhost; | |
378 | int masterport; | |
379 | redisClient *master; /* client that is master for this slave */ | |
380 | int replstate; | |
381 | unsigned int maxclients; | |
382 | unsigned long long maxmemory; | |
383 | unsigned int blpop_blocked_clients; | |
384 | unsigned int vm_blocked_clients; | |
385 | /* Sort parameters - qsort_r() is only available under BSD so we | |
386 | * have to take this state global, in order to pass it to sortCompare() */ | |
387 | int sort_desc; | |
388 | int sort_alpha; | |
389 | int sort_bypattern; | |
390 | /* Virtual memory configuration */ | |
391 | int vm_enabled; | |
392 | char *vm_swap_file; | |
393 | off_t vm_page_size; | |
394 | off_t vm_pages; | |
395 | unsigned long long vm_max_memory; | |
396 | /* Zip structure config */ | |
397 | size_t hash_max_zipmap_entries; | |
398 | size_t hash_max_zipmap_value; | |
399 | size_t list_max_ziplist_entries; | |
400 | size_t list_max_ziplist_value; | |
401 | /* Virtual memory state */ | |
402 | FILE *vm_fp; | |
403 | int vm_fd; | |
404 | off_t vm_next_page; /* Next probably empty page */ | |
405 | off_t vm_near_pages; /* Number of pages allocated sequentially */ | |
406 | unsigned char *vm_bitmap; /* Bitmap of free/used pages */ | |
407 | time_t unixtime; /* Unix time sampled every second. */ | |
408 | /* Virtual memory I/O threads stuff */ | |
409 | /* An I/O thread process an element taken from the io_jobs queue and | |
410 | * put the result of the operation in the io_done list. While the | |
411 | * job is being processed, it's put on io_processing queue. */ | |
412 | list *io_newjobs; /* List of VM I/O jobs yet to be processed */ | |
413 | list *io_processing; /* List of VM I/O jobs being processed */ | |
414 | list *io_processed; /* List of VM I/O jobs already processed */ | |
415 | list *io_ready_clients; /* Clients ready to be unblocked. All keys loaded */ | |
416 | pthread_mutex_t io_mutex; /* lock to access io_jobs/io_done/io_thread_job */ | |
417 | pthread_mutex_t obj_freelist_mutex; /* safe redis objects creation/free */ | |
418 | pthread_mutex_t io_swapfile_mutex; /* So we can lseek + write */ | |
419 | pthread_attr_t io_threads_attr; /* attributes for threads creation */ | |
420 | int io_active_threads; /* Number of running I/O threads */ | |
421 | int vm_max_threads; /* Max number of I/O threads running at the same time */ | |
422 | /* Our main thread is blocked on the event loop, locking for sockets ready | |
423 | * to be read or written, so when a threaded I/O operation is ready to be | |
424 | * processed by the main thread, the I/O thread will use a unix pipe to | |
425 | * awake the main thread. The followings are the two pipe FDs. */ | |
426 | int io_ready_pipe_read; | |
427 | int io_ready_pipe_write; | |
428 | /* Virtual memory stats */ | |
429 | unsigned long long vm_stats_used_pages; | |
430 | unsigned long long vm_stats_swapped_objects; | |
431 | unsigned long long vm_stats_swapouts; | |
432 | unsigned long long vm_stats_swapins; | |
433 | /* Pubsub */ | |
434 | dict *pubsub_channels; /* Map channels to list of subscribed clients */ | |
435 | list *pubsub_patterns; /* A list of pubsub_patterns */ | |
436 | /* Misc */ | |
437 | FILE *devnull; | |
438 | unsigned lruclock:22; /* clock incrementing every minute, for LRU */ | |
439 | unsigned lruclock_padding:10; | |
440 | }; | |
441 | ||
442 | typedef struct pubsubPattern { | |
443 | redisClient *client; | |
444 | robj *pattern; | |
445 | } pubsubPattern; | |
446 | ||
447 | typedef void redisCommandProc(redisClient *c); | |
448 | typedef void redisVmPreloadProc(redisClient *c, struct redisCommand *cmd, int argc, robj **argv); | |
449 | struct redisCommand { | |
450 | char *name; | |
451 | redisCommandProc *proc; | |
452 | int arity; | |
453 | int flags; | |
454 | /* Use a function to determine which keys need to be loaded | |
455 | * in the background prior to executing this command. Takes precedence | |
456 | * over vm_firstkey and others, ignored when NULL */ | |
457 | redisVmPreloadProc *vm_preload_proc; | |
458 | /* What keys should be loaded in background when calling this command? */ | |
459 | int vm_firstkey; /* The first argument that's a key (0 = no keys) */ | |
460 | int vm_lastkey; /* THe last argument that's a key */ | |
461 | int vm_keystep; /* The step between first and last key */ | |
462 | }; | |
463 | ||
464 | struct redisFunctionSym { | |
465 | char *name; | |
466 | unsigned long pointer; | |
467 | }; | |
468 | ||
469 | typedef struct _redisSortObject { | |
470 | robj *obj; | |
471 | union { | |
472 | double score; | |
473 | robj *cmpobj; | |
474 | } u; | |
475 | } redisSortObject; | |
476 | ||
477 | typedef struct _redisSortOperation { | |
478 | int type; | |
479 | robj *pattern; | |
480 | } redisSortOperation; | |
481 | ||
482 | /* ZSETs use a specialized version of Skiplists */ | |
e2641e09 | 483 | typedef struct zskiplistNode { |
e2641e09 | 484 | robj *obj; |
2159782b PN |
485 | double score; |
486 | struct zskiplistNode *backward; | |
487 | struct zskiplistLevel { | |
488 | struct zskiplistNode *forward; | |
489 | unsigned int span; | |
490 | } level[]; | |
e2641e09 | 491 | } zskiplistNode; |
492 | ||
493 | typedef struct zskiplist { | |
494 | struct zskiplistNode *header, *tail; | |
495 | unsigned long length; | |
496 | int level; | |
497 | } zskiplist; | |
498 | ||
499 | typedef struct zset { | |
500 | dict *dict; | |
501 | zskiplist *zsl; | |
502 | } zset; | |
503 | ||
504 | /* VM threaded I/O request message */ | |
505 | #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */ | |
506 | #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */ | |
507 | #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */ | |
508 | typedef struct iojob { | |
509 | int type; /* Request type, REDIS_IOJOB_* */ | |
510 | redisDb *db;/* Redis database */ | |
511 | robj *key; /* This I/O request is about swapping this key */ | |
512 | robj *id; /* Unique identifier of this job: | |
513 | this is the object to swap for REDIS_IOREQ_*_SWAP, or the | |
514 | vmpointer objct for REDIS_IOREQ_LOAD. */ | |
515 | robj *val; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this | |
516 | * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */ | |
517 | off_t page; /* Swap page where to read/write the object */ | |
518 | off_t pages; /* Swap pages needed to save object. PREPARE_SWAP return val */ | |
519 | int canceled; /* True if this command was canceled by blocking side of VM */ | |
520 | pthread_t thread; /* ID of the thread processing this entry */ | |
521 | } iojob; | |
522 | ||
523 | /* Structure to hold list iteration abstraction. */ | |
524 | typedef struct { | |
525 | robj *subject; | |
526 | unsigned char encoding; | |
527 | unsigned char direction; /* Iteration direction */ | |
528 | unsigned char *zi; | |
529 | listNode *ln; | |
530 | } listTypeIterator; | |
531 | ||
532 | /* Structure for an entry while iterating over a list. */ | |
533 | typedef struct { | |
534 | listTypeIterator *li; | |
535 | unsigned char *zi; /* Entry in ziplist */ | |
536 | listNode *ln; /* Entry in linked list */ | |
537 | } listTypeEntry; | |
538 | ||
539 | /* Structure to hold hash iteration abstration. Note that iteration over | |
540 | * hashes involves both fields and values. Because it is possible that | |
541 | * not both are required, store pointers in the iterator to avoid | |
542 | * unnecessary memory allocation for fields/values. */ | |
543 | typedef struct { | |
544 | int encoding; | |
545 | unsigned char *zi; | |
546 | unsigned char *zk, *zv; | |
547 | unsigned int zklen, zvlen; | |
548 | ||
549 | dictIterator *di; | |
550 | dictEntry *de; | |
551 | } hashTypeIterator; | |
552 | ||
553 | #define REDIS_HASH_KEY 1 | |
554 | #define REDIS_HASH_VALUE 2 | |
555 | ||
556 | /*----------------------------------------------------------------------------- | |
557 | * Extern declarations | |
558 | *----------------------------------------------------------------------------*/ | |
559 | ||
560 | extern struct redisServer server; | |
561 | extern struct sharedObjectsStruct shared; | |
562 | extern dictType setDictType; | |
563 | extern dictType zsetDictType; | |
564 | extern double R_Zero, R_PosInf, R_NegInf, R_Nan; | |
565 | dictType hashDictType; | |
566 | ||
567 | /*----------------------------------------------------------------------------- | |
568 | * Functions prototypes | |
569 | *----------------------------------------------------------------------------*/ | |
570 | ||
571 | /* networking.c -- Networking and Client related operations */ | |
572 | redisClient *createClient(int fd); | |
573 | void closeTimedoutClients(void); | |
574 | void freeClient(redisClient *c); | |
575 | void resetClient(redisClient *c); | |
576 | void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask); | |
577 | void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask); | |
578 | void addReply(redisClient *c, robj *obj); | |
579 | void addReplySds(redisClient *c, sds s); | |
580 | void processInputBuffer(redisClient *c); | |
581 | void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask); | |
582 | void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask); | |
583 | void addReplyBulk(redisClient *c, robj *obj); | |
584 | void addReplyBulkCString(redisClient *c, char *s); | |
585 | void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask); | |
586 | void addReply(redisClient *c, robj *obj); | |
587 | void addReplySds(redisClient *c, sds s); | |
588 | void addReplyDouble(redisClient *c, double d); | |
589 | void addReplyLongLong(redisClient *c, long long ll); | |
590 | void addReplyUlong(redisClient *c, unsigned long ul); | |
591 | void *dupClientReplyValue(void *o); | |
592 | ||
593 | /* List data type */ | |
594 | void listTypeTryConversion(robj *subject, robj *value); | |
595 | void listTypePush(robj *subject, robj *value, int where); | |
596 | robj *listTypePop(robj *subject, int where); | |
597 | unsigned long listTypeLength(robj *subject); | |
598 | listTypeIterator *listTypeInitIterator(robj *subject, int index, unsigned char direction); | |
599 | void listTypeReleaseIterator(listTypeIterator *li); | |
600 | int listTypeNext(listTypeIterator *li, listTypeEntry *entry); | |
601 | robj *listTypeGet(listTypeEntry *entry); | |
602 | void listTypeInsert(listTypeEntry *entry, robj *value, int where); | |
603 | int listTypeEqual(listTypeEntry *entry, robj *o); | |
604 | void listTypeDelete(listTypeEntry *entry); | |
605 | void listTypeConvert(robj *subject, int enc); | |
606 | void unblockClientWaitingData(redisClient *c); | |
607 | int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele); | |
608 | void popGenericCommand(redisClient *c, int where); | |
609 | ||
610 | /* MULTI/EXEC/WATCH... */ | |
611 | void unwatchAllKeys(redisClient *c); | |
612 | void initClientMultiState(redisClient *c); | |
613 | void freeClientMultiState(redisClient *c); | |
614 | void queueMultiCommand(redisClient *c, struct redisCommand *cmd); | |
615 | void touchWatchedKey(redisDb *db, robj *key); | |
616 | void touchWatchedKeysOnFlush(int dbid); | |
617 | ||
618 | /* Redis object implementation */ | |
619 | void decrRefCount(void *o); | |
620 | void incrRefCount(robj *o); | |
621 | void freeStringObject(robj *o); | |
622 | void freeListObject(robj *o); | |
623 | void freeSetObject(robj *o); | |
624 | void freeZsetObject(robj *o); | |
625 | void freeHashObject(robj *o); | |
626 | robj *createObject(int type, void *ptr); | |
627 | robj *createStringObject(char *ptr, size_t len); | |
628 | robj *dupStringObject(robj *o); | |
629 | robj *tryObjectEncoding(robj *o); | |
630 | robj *getDecodedObject(robj *o); | |
631 | size_t stringObjectLen(robj *o); | |
632 | int tryFreeOneObjectFromFreelist(void); | |
633 | robj *createStringObjectFromLongLong(long long value); | |
634 | robj *createListObject(void); | |
635 | robj *createZiplistObject(void); | |
636 | robj *createSetObject(void); | |
637 | robj *createHashObject(void); | |
638 | robj *createZsetObject(void); | |
639 | int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char *msg); | |
640 | int checkType(redisClient *c, robj *o, int type); | |
641 | int getLongLongFromObjectOrReply(redisClient *c, robj *o, long long *target, const char *msg); | |
642 | int getDoubleFromObjectOrReply(redisClient *c, robj *o, double *target, const char *msg); | |
643 | int getLongLongFromObject(robj *o, long long *target); | |
644 | char *strEncoding(int encoding); | |
645 | int compareStringObjects(robj *a, robj *b); | |
646 | int equalStringObjects(robj *a, robj *b); | |
647 | ||
648 | /* Replication */ | |
649 | void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc); | |
650 | void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc); | |
651 | int syncWithMaster(void); | |
652 | void updateSlavesWaitingBgsave(int bgsaveerr); | |
653 | ||
654 | /* RDB persistence */ | |
655 | int rdbLoad(char *filename); | |
656 | int rdbSaveBackground(char *filename); | |
657 | void rdbRemoveTempFile(pid_t childpid); | |
658 | int rdbSave(char *filename); | |
659 | int rdbSaveObject(FILE *fp, robj *o); | |
660 | off_t rdbSavedObjectPages(robj *o, FILE *fp); | |
661 | off_t rdbSavedObjectLen(robj *o, FILE *fp); | |
662 | robj *rdbLoadObject(int type, FILE *fp); | |
663 | void backgroundSaveDoneHandler(int statloc); | |
664 | ||
665 | /* AOF persistence */ | |
666 | void flushAppendOnlyFile(void); | |
667 | void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc); | |
668 | void aofRemoveTempFile(pid_t childpid); | |
669 | int rewriteAppendOnlyFileBackground(void); | |
670 | int loadAppendOnlyFile(char *filename); | |
671 | void stopAppendOnly(void); | |
672 | int startAppendOnly(void); | |
673 | void backgroundRewriteDoneHandler(int statloc); | |
674 | ||
675 | /* Sorted sets data type */ | |
676 | zskiplist *zslCreate(void); | |
677 | void zslFree(zskiplist *zsl); | |
69ef89f2 | 678 | zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj); |
e2641e09 | 679 | |
680 | /* Core functions */ | |
681 | void freeMemoryIfNeeded(void); | |
682 | int processCommand(redisClient *c); | |
683 | void setupSigSegvAction(void); | |
684 | struct redisCommand *lookupCommand(char *name); | |
685 | void call(redisClient *c, struct redisCommand *cmd); | |
686 | int prepareForShutdown(); | |
687 | void redisLog(int level, const char *fmt, ...); | |
688 | void usage(); | |
689 | void updateDictResizePolicy(void); | |
690 | int htNeedsResize(dict *dict); | |
691 | void oom(const char *msg); | |
692 | ||
693 | /* Virtual Memory */ | |
694 | void vmInit(void); | |
695 | void vmMarkPagesFree(off_t page, off_t count); | |
696 | robj *vmLoadObject(robj *o); | |
697 | robj *vmPreviewObject(robj *o); | |
698 | int vmSwapOneObjectBlocking(void); | |
699 | int vmSwapOneObjectThreaded(void); | |
700 | int vmCanSwapOut(void); | |
701 | void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata, int mask); | |
702 | void vmCancelThreadedIOJob(robj *o); | |
703 | void lockThreadedIO(void); | |
704 | void unlockThreadedIO(void); | |
705 | int vmSwapObjectThreaded(robj *key, robj *val, redisDb *db); | |
706 | void freeIOJob(iojob *j); | |
707 | void queueIOJob(iojob *j); | |
708 | int vmWriteObjectOnSwap(robj *o, off_t page); | |
709 | robj *vmReadObjectFromSwap(off_t page, int type); | |
710 | void waitEmptyIOJobsQueue(void); | |
711 | void vmReopenSwapFile(void); | |
712 | int vmFreePage(off_t page); | |
713 | void zunionInterBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv); | |
714 | void execBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv); | |
715 | int blockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd); | |
716 | int dontWaitForSwappedKey(redisClient *c, robj *key); | |
717 | void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key); | |
718 | vmpointer *vmSwapObjectBlocking(robj *val); | |
719 | ||
720 | /* Hash data type */ | |
721 | void convertToRealHash(robj *o); | |
722 | void hashTypeTryConversion(robj *subject, robj **argv, int start, int end); | |
723 | void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2); | |
724 | robj *hashTypeGet(robj *o, robj *key); | |
725 | int hashTypeExists(robj *o, robj *key); | |
726 | int hashTypeSet(robj *o, robj *key, robj *value); | |
727 | int hashTypeDelete(robj *o, robj *key); | |
728 | unsigned long hashTypeLength(robj *o); | |
729 | hashTypeIterator *hashTypeInitIterator(robj *subject); | |
730 | void hashTypeReleaseIterator(hashTypeIterator *hi); | |
731 | int hashTypeNext(hashTypeIterator *hi); | |
732 | robj *hashTypeCurrent(hashTypeIterator *hi, int what); | |
733 | robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key); | |
734 | ||
735 | /* Pub / Sub */ | |
736 | int pubsubUnsubscribeAllChannels(redisClient *c, int notify); | |
737 | int pubsubUnsubscribeAllPatterns(redisClient *c, int notify); | |
738 | void freePubsubPattern(void *p); | |
739 | int listMatchPubsubPattern(void *a, void *b); | |
740 | ||
741 | /* Utility functions */ | |
742 | int stringmatchlen(const char *pattern, int patternLen, | |
743 | const char *string, int stringLen, int nocase); | |
744 | int stringmatch(const char *pattern, const char *string, int nocase); | |
745 | long long memtoll(const char *p, int *err); | |
746 | int ll2string(char *s, size_t len, long long value); | |
747 | int isStringRepresentableAsLong(sds s, long *longval); | |
748 | ||
749 | /* Configuration */ | |
750 | void loadServerConfig(char *filename); | |
751 | void appendServerSaveParams(time_t seconds, int changes); | |
752 | void resetServerSaveParams(); | |
753 | ||
754 | /* db.c -- Keyspace access API */ | |
755 | int removeExpire(redisDb *db, robj *key); | |
756 | int expireIfNeeded(redisDb *db, robj *key); | |
757 | int deleteIfVolatile(redisDb *db, robj *key); | |
758 | time_t getExpire(redisDb *db, robj *key); | |
759 | int setExpire(redisDb *db, robj *key, time_t when); | |
760 | robj *lookupKey(redisDb *db, robj *key); | |
761 | robj *lookupKeyRead(redisDb *db, robj *key); | |
762 | robj *lookupKeyWrite(redisDb *db, robj *key); | |
763 | robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply); | |
764 | robj *lookupKeyWriteOrReply(redisClient *c, robj *key, robj *reply); | |
765 | int dbAdd(redisDb *db, robj *key, robj *val); | |
766 | int dbReplace(redisDb *db, robj *key, robj *val); | |
767 | int dbExists(redisDb *db, robj *key); | |
768 | robj *dbRandomKey(redisDb *db); | |
769 | int dbDelete(redisDb *db, robj *key); | |
770 | long long emptyDb(); | |
771 | int selectDb(redisClient *c, int id); | |
772 | ||
773 | /* Git SHA1 */ | |
774 | char *redisGitSHA1(void); | |
775 | char *redisGitDirty(void); | |
776 | ||
777 | /* Commands prototypes */ | |
778 | void authCommand(redisClient *c); | |
779 | void pingCommand(redisClient *c); | |
780 | void echoCommand(redisClient *c); | |
781 | void setCommand(redisClient *c); | |
782 | void setnxCommand(redisClient *c); | |
783 | void setexCommand(redisClient *c); | |
784 | void getCommand(redisClient *c); | |
785 | void delCommand(redisClient *c); | |
786 | void existsCommand(redisClient *c); | |
787 | void incrCommand(redisClient *c); | |
788 | void decrCommand(redisClient *c); | |
789 | void incrbyCommand(redisClient *c); | |
790 | void decrbyCommand(redisClient *c); | |
791 | void selectCommand(redisClient *c); | |
792 | void randomkeyCommand(redisClient *c); | |
793 | void keysCommand(redisClient *c); | |
794 | void dbsizeCommand(redisClient *c); | |
795 | void lastsaveCommand(redisClient *c); | |
796 | void saveCommand(redisClient *c); | |
797 | void bgsaveCommand(redisClient *c); | |
798 | void bgrewriteaofCommand(redisClient *c); | |
799 | void shutdownCommand(redisClient *c); | |
800 | void moveCommand(redisClient *c); | |
801 | void renameCommand(redisClient *c); | |
802 | void renamenxCommand(redisClient *c); | |
803 | void lpushCommand(redisClient *c); | |
804 | void rpushCommand(redisClient *c); | |
805 | void lpushxCommand(redisClient *c); | |
806 | void rpushxCommand(redisClient *c); | |
807 | void linsertCommand(redisClient *c); | |
808 | void lpopCommand(redisClient *c); | |
809 | void rpopCommand(redisClient *c); | |
810 | void llenCommand(redisClient *c); | |
811 | void lindexCommand(redisClient *c); | |
812 | void lrangeCommand(redisClient *c); | |
813 | void ltrimCommand(redisClient *c); | |
814 | void typeCommand(redisClient *c); | |
815 | void lsetCommand(redisClient *c); | |
816 | void saddCommand(redisClient *c); | |
817 | void sremCommand(redisClient *c); | |
818 | void smoveCommand(redisClient *c); | |
819 | void sismemberCommand(redisClient *c); | |
820 | void scardCommand(redisClient *c); | |
821 | void spopCommand(redisClient *c); | |
822 | void srandmemberCommand(redisClient *c); | |
823 | void sinterCommand(redisClient *c); | |
824 | void sinterstoreCommand(redisClient *c); | |
825 | void sunionCommand(redisClient *c); | |
826 | void sunionstoreCommand(redisClient *c); | |
827 | void sdiffCommand(redisClient *c); | |
828 | void sdiffstoreCommand(redisClient *c); | |
829 | void syncCommand(redisClient *c); | |
830 | void flushdbCommand(redisClient *c); | |
831 | void flushallCommand(redisClient *c); | |
832 | void sortCommand(redisClient *c); | |
833 | void lremCommand(redisClient *c); | |
834 | void rpoplpushcommand(redisClient *c); | |
835 | void infoCommand(redisClient *c); | |
836 | void mgetCommand(redisClient *c); | |
837 | void monitorCommand(redisClient *c); | |
838 | void expireCommand(redisClient *c); | |
839 | void expireatCommand(redisClient *c); | |
840 | void getsetCommand(redisClient *c); | |
841 | void ttlCommand(redisClient *c); | |
842 | void slaveofCommand(redisClient *c); | |
843 | void debugCommand(redisClient *c); | |
844 | void msetCommand(redisClient *c); | |
845 | void msetnxCommand(redisClient *c); | |
846 | void zaddCommand(redisClient *c); | |
847 | void zincrbyCommand(redisClient *c); | |
848 | void zrangeCommand(redisClient *c); | |
849 | void zrangebyscoreCommand(redisClient *c); | |
850 | void zcountCommand(redisClient *c); | |
851 | void zrevrangeCommand(redisClient *c); | |
852 | void zcardCommand(redisClient *c); | |
853 | void zremCommand(redisClient *c); | |
854 | void zscoreCommand(redisClient *c); | |
855 | void zremrangebyscoreCommand(redisClient *c); | |
856 | void multiCommand(redisClient *c); | |
857 | void execCommand(redisClient *c); | |
858 | void discardCommand(redisClient *c); | |
859 | void blpopCommand(redisClient *c); | |
860 | void brpopCommand(redisClient *c); | |
861 | void appendCommand(redisClient *c); | |
862 | void substrCommand(redisClient *c); | |
80091bba | 863 | void strlenCommand(redisClient *c); |
e2641e09 | 864 | void zrankCommand(redisClient *c); |
865 | void zrevrankCommand(redisClient *c); | |
866 | void hsetCommand(redisClient *c); | |
867 | void hsetnxCommand(redisClient *c); | |
868 | void hgetCommand(redisClient *c); | |
869 | void hmsetCommand(redisClient *c); | |
870 | void hmgetCommand(redisClient *c); | |
871 | void hdelCommand(redisClient *c); | |
872 | void hlenCommand(redisClient *c); | |
873 | void zremrangebyrankCommand(redisClient *c); | |
874 | void zunionstoreCommand(redisClient *c); | |
875 | void zinterstoreCommand(redisClient *c); | |
876 | void hkeysCommand(redisClient *c); | |
877 | void hvalsCommand(redisClient *c); | |
878 | void hgetallCommand(redisClient *c); | |
879 | void hexistsCommand(redisClient *c); | |
880 | void configCommand(redisClient *c); | |
881 | void hincrbyCommand(redisClient *c); | |
882 | void subscribeCommand(redisClient *c); | |
883 | void unsubscribeCommand(redisClient *c); | |
884 | void psubscribeCommand(redisClient *c); | |
885 | void punsubscribeCommand(redisClient *c); | |
886 | void publishCommand(redisClient *c); | |
887 | void watchCommand(redisClient *c); | |
888 | void unwatchCommand(redisClient *c); | |
889 | ||
b3aa6d71 | 890 | #if defined(__GNUC__) |
b3aa6d71 | 891 | void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); |
892 | void free(void *ptr) __attribute__ ((deprecated)); | |
893 | void *malloc(size_t size) __attribute__ ((deprecated)); | |
894 | void *realloc(void *ptr, size_t size) __attribute__ ((deprecated)); | |
895 | #endif | |
896 | ||
e2641e09 | 897 | #endif |