]> git.saurik.com Git - redis.git/blame - src/bio.c
Hash function switched to murmurhash2.
[redis.git] / src / bio.c
CommitLineData
02925dd9 1/* Background I/O service for Redis.
2 *
3 * This file implements operations that we need to perform in the background.
4 * Currently there is only a single operation, that is a background close(2)
5 * system call. This is needed as when the process is the last owner of a
6 * reference to a file closing it means unlinking it, and the deletion of the
7 * file is slow, blocking the server.
8 *
9 * In the future we'll either continue implementing new things we need or
10 * we'll switch to libeio. However there are probably long term uses for this
11 * file as we may want to put here Redis specific background tasks (for instance
12 * it is not impossible that we'll need a non blocking FLUSHDB/FLUSHALL
13 * implementation).
14 *
15 * DESIGN
16 * ------
17 *
18 * The design is trivial, we have a structure representing a job to perform
50be9b97 19 * and a different thread and job queue for every job type.
20 * Every thread wait for new jobs in its queue, and process every job
21 * sequentially.
22 *
fbb23ce4 23 * Jobs of the same type are guaranteed to be processed from the least
24 * recently inserted to the most recently inserted (older jobs processed
25 * first).
26 *
02925dd9 27 * Currently there is no way for the creator of the job to be notified about
28 * the completion of the operation, this will only be added when/if needed.
29 */
30
31#include "redis.h"
32#include "bio.h"
33
50be9b97 34static pthread_mutex_t bio_mutex[REDIS_BIO_NUM_OPS];
35static pthread_cond_t bio_condvar[REDIS_BIO_NUM_OPS];
36static list *bio_jobs[REDIS_BIO_NUM_OPS];
fde4e4c4 37/* The following array is used to hold the number of pending jobs for every
38 * OP type. This allows us to export the bioPendingJobsOfType() API that is
39 * useful when the main thread wants to perform some operation that may involve
40 * objects shared with the background thread. The main thread will just wait
41 * that there are no longer jobs of this type to be executed before performing
42 * the sensible operation. This data is also useful for reporting. */
50be9b97 43static unsigned long long bio_pending[REDIS_BIO_NUM_OPS];
02925dd9 44
45/* This structure represents a background Job. It is only used locally to this
46 * file as the API deos not expose the internals at all. */
47struct bio_job {
50be9b97 48 time_t time; /* Time at which the job was created. */
49 /* Job specific arguments pointers. If we need to pass more than three
50 * arguments we can just pass a pointer to a structure or alike. */
51 void *arg1, *arg2, *arg3;
f81a5f54 52};
02925dd9 53
54void *bioProcessBackgroundJobs(void *arg);
55
f81a5f54 56/* Make sure we have enough stack to perform all the things we do in the
57 * main thread. */
58#define REDIS_THREAD_STACK_SIZE (1024*1024*4)
59
02925dd9 60/* Initialize the background system, spawning the thread. */
61void bioInit(void) {
62 pthread_attr_t attr;
63 pthread_t thread;
64 size_t stacksize;
fde4e4c4 65 int j;
02925dd9 66
fde4e4c4 67 /* Initialization of state vars and objects */
50be9b97 68 for (j = 0; j < REDIS_BIO_NUM_OPS; j++) {
69 pthread_mutex_init(&bio_mutex[j],NULL);
70 pthread_cond_init(&bio_condvar[j],NULL);
71 bio_jobs[j] = listCreate();
72 bio_pending[j] = 0;
73 }
02925dd9 74
75 /* Set the stack size as by default it may be small in some system */
76 pthread_attr_init(&attr);
f81a5f54 77 pthread_attr_getstacksize(&attr,&stacksize);
02925dd9 78 if (!stacksize) stacksize = 1; /* The world is full of Solaris Fixes */
79 while (stacksize < REDIS_THREAD_STACK_SIZE) stacksize *= 2;
80 pthread_attr_setstacksize(&attr, stacksize);
81
50be9b97 82 /* Ready to spawn our threads. We use the single argument the thread
83 * function accepts in order to pass the job ID the thread is
84 * responsible of. */
85 for (j = 0; j < REDIS_BIO_NUM_OPS; j++) {
86 void *arg = (void*)(unsigned long) j;
87 if (pthread_create(&thread,&attr,bioProcessBackgroundJobs,arg) != 0) {
88 redisLog(REDIS_WARNING,"Fatal: Can't initialize Background Jobs.");
89 exit(1);
90 }
02925dd9 91 }
92}
93
50be9b97 94void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3) {
02925dd9 95 struct bio_job *job = zmalloc(sizeof(*job));
96
50be9b97 97 job->time = time(NULL);
98 job->arg1 = arg1;
99 job->arg2 = arg2;
100 job->arg3 = arg3;
101 pthread_mutex_lock(&bio_mutex[type]);
102 listAddNodeTail(bio_jobs[type],job);
fde4e4c4 103 bio_pending[type]++;
50be9b97 104 pthread_cond_signal(&bio_condvar[type]);
105 pthread_mutex_unlock(&bio_mutex[type]);
02925dd9 106}
107
108void *bioProcessBackgroundJobs(void *arg) {
109 struct bio_job *job;
50be9b97 110 unsigned long type = (unsigned long) arg;
aa96122d 111 sigset_t sigset;
02925dd9 112
113 pthread_detach(pthread_self());
50be9b97 114 pthread_mutex_lock(&bio_mutex[type]);
aa96122d 115 /* Block SIGALRM so we are sure that only the main thread will
116 * receive the watchdog signal. */
117 sigemptyset(&sigset);
118 sigaddset(&sigset, SIGALRM);
119 if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
120 redisLog(REDIS_WARNING,
121 "Warning: can't mask SIGALRM in bio.c thread: %s", strerror(errno));
122
02925dd9 123 while(1) {
124 listNode *ln;
125
126 /* The loop always starts with the lock hold. */
50be9b97 127 if (listLength(bio_jobs[type]) == 0) {
128 pthread_cond_wait(&bio_condvar[type],&bio_mutex[type]);
02925dd9 129 continue;
130 }
131 /* Pop the job from the queue. */
50be9b97 132 ln = listFirst(bio_jobs[type]);
02925dd9 133 job = ln->value;
02925dd9 134 /* It is now possible to unlock the background system as we know have
135 * a stand alone job structure to process.*/
50be9b97 136 pthread_mutex_unlock(&bio_mutex[type]);
02925dd9 137
138 /* Process the job accordingly to its type. */
fde4e4c4 139 if (type == REDIS_BIO_CLOSE_FILE) {
50be9b97 140 close((long)job->arg1);
9fc1e1b1 141 } else if (type == REDIS_BIO_AOF_FSYNC) {
a60b397b 142 aof_fsync((long)job->arg1);
02925dd9 143 } else {
144 redisPanic("Wrong job type in bioProcessBackgroundJobs().");
145 }
146 zfree(job);
147
148 /* Lock again before reiterating the loop, if there are no longer
149 * jobs to process we'll block again in pthread_cond_wait(). */
50be9b97 150 pthread_mutex_lock(&bio_mutex[type]);
1317b7c2 151 listDelNode(bio_jobs[type],ln);
fde4e4c4 152 bio_pending[type]--;
153 }
154}
155
156/* Return the number of pending jobs of the specified type. */
157unsigned long long bioPendingJobsOfType(int type) {
158 unsigned long long val;
50be9b97 159 pthread_mutex_lock(&bio_mutex[type]);
fde4e4c4 160 val = bio_pending[type];
50be9b97 161 pthread_mutex_unlock(&bio_mutex[type]);
fde4e4c4 162 return val;
163}
164
47e7f9ac 165#if 0 /* We don't use the following code for now, and bioWaitPendingJobsLE
166 probably needs a rewrite using conditional variables instead of the
167 current implementation. */
168
169
fde4e4c4 170/* Wait until the number of pending jobs of the specified type are
171 * less or equal to the specified number.
172 *
173 * This function may block for long time, it should only be used to perform
91de5421 174 * the following tasks:
175 *
176 * 1) To avoid that the main thread is pushing jobs of a given time so fast
177 * that the background thread can't process them at the same speed.
178 * So before creating a new job of a given type the main thread should
179 * call something like: bioWaitPendingJobsLE(job_type,10000);
180 * 2) In order to perform special operations that make it necessary to be sure
181 * no one is touching shared resourced in the background.
182 */
fde4e4c4 183void bioWaitPendingJobsLE(int type, unsigned long long num) {
184 unsigned long long iteration = 0;
185
186 /* We poll the jobs queue aggressively to start, and gradually relax
187 * the polling speed if it is going to take too much time. */
188 while(1) {
189 iteration++;
190 if (iteration > 1000 && iteration <= 10000) {
191 usleep(100);
192 } else if (iteration > 10000) {
193 usleep(1000);
194 }
195 if (bioPendingJobsOfType(type) <= num) break;
02925dd9 196 }
197}
50be9b97 198
199/* Return the older job of the specified type. */
200time_t bioOlderJobOfType(int type) {
201 time_t time;
202 listNode *ln;
203 struct bio_job *job;
204
205 pthread_mutex_lock(&bio_mutex[type]);
206 ln = listFirst(bio_jobs[type]);
b39a4d0b 207 if (ln == NULL) {
208 pthread_mutex_unlock(&bio_mutex[type]);
209 return 0;
210 }
50be9b97 211 job = ln->value;
212 time = job->time;
213 pthread_mutex_unlock(&bio_mutex[type]);
214 return time;
215}
216
47e7f9ac 217#endif