]> git.saurik.com Git - redis.git/blob - src/ae.c
More robust handling of AOF rewrite child.
[redis.git] / src / ae.c
1 /* A simple event-driven programming library. Originally I wrote this code
2 * for the Jim's event-loop (Jim is a Tcl interpreter) but later translated
3 * it in form of a library for easy reuse.
4 *
5 * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright notice,
12 * this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * * Neither the name of Redis nor the names of its contributors may be used
17 * to endorse or promote products derived from this software without
18 * specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <stdio.h>
34 #include <sys/time.h>
35 #include <sys/types.h>
36 #include <unistd.h>
37 #include <stdlib.h>
38 #include <poll.h>
39 #include <string.h>
40 #include <time.h>
41
42 #include "ae.h"
43 #include "zmalloc.h"
44 #include "config.h"
45
46 /* Include the best multiplexing layer supported by this system.
47 * The following should be ordered by performances, descending. */
48 #ifdef HAVE_EVPORT
49 #include "ae_evport.c"
50 #else
51 #ifdef HAVE_EPOLL
52 #include "ae_epoll.c"
53 #else
54 #ifdef HAVE_KQUEUE
55 #include "ae_kqueue.c"
56 #else
57 #include "ae_select.c"
58 #endif
59 #endif
60 #endif
61
62 aeEventLoop *aeCreateEventLoop(int setsize) {
63 aeEventLoop *eventLoop;
64 int i;
65
66 if ((eventLoop = zmalloc(sizeof(*eventLoop))) == NULL) goto err;
67 eventLoop->events = zmalloc(sizeof(aeFileEvent)*setsize);
68 eventLoop->fired = zmalloc(sizeof(aeFiredEvent)*setsize);
69 if (eventLoop->events == NULL || eventLoop->fired == NULL) goto err;
70 eventLoop->setsize = setsize;
71 eventLoop->lastTime = time(NULL);
72 eventLoop->timeEventHead = NULL;
73 eventLoop->timeEventNextId = 0;
74 eventLoop->stop = 0;
75 eventLoop->maxfd = -1;
76 eventLoop->beforesleep = NULL;
77 if (aeApiCreate(eventLoop) == -1) goto err;
78 /* Events with mask == AE_NONE are not set. So let's initialize the
79 * vector with it. */
80 for (i = 0; i < setsize; i++)
81 eventLoop->events[i].mask = AE_NONE;
82 return eventLoop;
83
84 err:
85 if (eventLoop) {
86 zfree(eventLoop->events);
87 zfree(eventLoop->fired);
88 zfree(eventLoop);
89 }
90 return NULL;
91 }
92
93 void aeDeleteEventLoop(aeEventLoop *eventLoop) {
94 aeApiFree(eventLoop);
95 zfree(eventLoop->events);
96 zfree(eventLoop->fired);
97 zfree(eventLoop);
98 }
99
100 void aeStop(aeEventLoop *eventLoop) {
101 eventLoop->stop = 1;
102 }
103
104 int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
105 aeFileProc *proc, void *clientData)
106 {
107 if (fd >= eventLoop->setsize) return AE_ERR;
108 aeFileEvent *fe = &eventLoop->events[fd];
109
110 if (aeApiAddEvent(eventLoop, fd, mask) == -1)
111 return AE_ERR;
112 fe->mask |= mask;
113 if (mask & AE_READABLE) fe->rfileProc = proc;
114 if (mask & AE_WRITABLE) fe->wfileProc = proc;
115 fe->clientData = clientData;
116 if (fd > eventLoop->maxfd)
117 eventLoop->maxfd = fd;
118 return AE_OK;
119 }
120
121 void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask)
122 {
123 if (fd >= eventLoop->setsize) return;
124 aeFileEvent *fe = &eventLoop->events[fd];
125
126 if (fe->mask == AE_NONE) return;
127 fe->mask = fe->mask & (~mask);
128 if (fd == eventLoop->maxfd && fe->mask == AE_NONE) {
129 /* Update the max fd */
130 int j;
131
132 for (j = eventLoop->maxfd-1; j >= 0; j--)
133 if (eventLoop->events[j].mask != AE_NONE) break;
134 eventLoop->maxfd = j;
135 }
136 aeApiDelEvent(eventLoop, fd, mask);
137 }
138
139 int aeGetFileEvents(aeEventLoop *eventLoop, int fd) {
140 if (fd >= eventLoop->setsize) return 0;
141 aeFileEvent *fe = &eventLoop->events[fd];
142
143 return fe->mask;
144 }
145
146 static void aeGetTime(long *seconds, long *milliseconds)
147 {
148 struct timeval tv;
149
150 gettimeofday(&tv, NULL);
151 *seconds = tv.tv_sec;
152 *milliseconds = tv.tv_usec/1000;
153 }
154
155 static void aeAddMillisecondsToNow(long long milliseconds, long *sec, long *ms) {
156 long cur_sec, cur_ms, when_sec, when_ms;
157
158 aeGetTime(&cur_sec, &cur_ms);
159 when_sec = cur_sec + milliseconds/1000;
160 when_ms = cur_ms + milliseconds%1000;
161 if (when_ms >= 1000) {
162 when_sec ++;
163 when_ms -= 1000;
164 }
165 *sec = when_sec;
166 *ms = when_ms;
167 }
168
169 long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds,
170 aeTimeProc *proc, void *clientData,
171 aeEventFinalizerProc *finalizerProc)
172 {
173 long long id = eventLoop->timeEventNextId++;
174 aeTimeEvent *te;
175
176 te = zmalloc(sizeof(*te));
177 if (te == NULL) return AE_ERR;
178 te->id = id;
179 aeAddMillisecondsToNow(milliseconds,&te->when_sec,&te->when_ms);
180 te->timeProc = proc;
181 te->finalizerProc = finalizerProc;
182 te->clientData = clientData;
183 te->next = eventLoop->timeEventHead;
184 eventLoop->timeEventHead = te;
185 return id;
186 }
187
188 int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id)
189 {
190 aeTimeEvent *te, *prev = NULL;
191
192 te = eventLoop->timeEventHead;
193 while(te) {
194 if (te->id == id) {
195 if (prev == NULL)
196 eventLoop->timeEventHead = te->next;
197 else
198 prev->next = te->next;
199 if (te->finalizerProc)
200 te->finalizerProc(eventLoop, te->clientData);
201 zfree(te);
202 return AE_OK;
203 }
204 prev = te;
205 te = te->next;
206 }
207 return AE_ERR; /* NO event with the specified ID found */
208 }
209
210 /* Search the first timer to fire.
211 * This operation is useful to know how many time the select can be
212 * put in sleep without to delay any event.
213 * If there are no timers NULL is returned.
214 *
215 * Note that's O(N) since time events are unsorted.
216 * Possible optimizations (not needed by Redis so far, but...):
217 * 1) Insert the event in order, so that the nearest is just the head.
218 * Much better but still insertion or deletion of timers is O(N).
219 * 2) Use a skiplist to have this operation as O(1) and insertion as O(log(N)).
220 */
221 static aeTimeEvent *aeSearchNearestTimer(aeEventLoop *eventLoop)
222 {
223 aeTimeEvent *te = eventLoop->timeEventHead;
224 aeTimeEvent *nearest = NULL;
225
226 while(te) {
227 if (!nearest || te->when_sec < nearest->when_sec ||
228 (te->when_sec == nearest->when_sec &&
229 te->when_ms < nearest->when_ms))
230 nearest = te;
231 te = te->next;
232 }
233 return nearest;
234 }
235
236 /* Process time events */
237 static int processTimeEvents(aeEventLoop *eventLoop) {
238 int processed = 0;
239 aeTimeEvent *te;
240 long long maxId;
241 time_t now = time(NULL);
242
243 /* If the system clock is moved to the future, and then set back to the
244 * right value, time events may be delayed in a random way. Often this
245 * means that scheduled operations will not be performed soon enough.
246 *
247 * Here we try to detect system clock skews, and force all the time
248 * events to be processed ASAP when this happens: the idea is that
249 * processing events earlier is less dangerous than delaying them
250 * indefinitely, and practice suggests it is. */
251 if (now < eventLoop->lastTime) {
252 te = eventLoop->timeEventHead;
253 while(te) {
254 te->when_sec = 0;
255 te = te->next;
256 }
257 }
258 eventLoop->lastTime = now;
259
260 te = eventLoop->timeEventHead;
261 maxId = eventLoop->timeEventNextId-1;
262 while(te) {
263 long now_sec, now_ms;
264 long long id;
265
266 if (te->id > maxId) {
267 te = te->next;
268 continue;
269 }
270 aeGetTime(&now_sec, &now_ms);
271 if (now_sec > te->when_sec ||
272 (now_sec == te->when_sec && now_ms >= te->when_ms))
273 {
274 int retval;
275
276 id = te->id;
277 retval = te->timeProc(eventLoop, id, te->clientData);
278 processed++;
279 /* After an event is processed our time event list may
280 * no longer be the same, so we restart from head.
281 * Still we make sure to don't process events registered
282 * by event handlers itself in order to don't loop forever.
283 * To do so we saved the max ID we want to handle.
284 *
285 * FUTURE OPTIMIZATIONS:
286 * Note that this is NOT great algorithmically. Redis uses
287 * a single time event so it's not a problem but the right
288 * way to do this is to add the new elements on head, and
289 * to flag deleted elements in a special way for later
290 * deletion (putting references to the nodes to delete into
291 * another linked list). */
292 if (retval != AE_NOMORE) {
293 aeAddMillisecondsToNow(retval,&te->when_sec,&te->when_ms);
294 } else {
295 aeDeleteTimeEvent(eventLoop, id);
296 }
297 te = eventLoop->timeEventHead;
298 } else {
299 te = te->next;
300 }
301 }
302 return processed;
303 }
304
305 /* Process every pending time event, then every pending file event
306 * (that may be registered by time event callbacks just processed).
307 * Without special flags the function sleeps until some file event
308 * fires, or when the next time event occurrs (if any).
309 *
310 * If flags is 0, the function does nothing and returns.
311 * if flags has AE_ALL_EVENTS set, all the kind of events are processed.
312 * if flags has AE_FILE_EVENTS set, file events are processed.
313 * if flags has AE_TIME_EVENTS set, time events are processed.
314 * if flags has AE_DONT_WAIT set the function returns ASAP until all
315 * the events that's possible to process without to wait are processed.
316 *
317 * The function returns the number of events processed. */
318 int aeProcessEvents(aeEventLoop *eventLoop, int flags)
319 {
320 int processed = 0, numevents;
321
322 /* Nothing to do? return ASAP */
323 if (!(flags & AE_TIME_EVENTS) && !(flags & AE_FILE_EVENTS)) return 0;
324
325 /* Note that we want call select() even if there are no
326 * file events to process as long as we want to process time
327 * events, in order to sleep until the next time event is ready
328 * to fire. */
329 if (eventLoop->maxfd != -1 ||
330 ((flags & AE_TIME_EVENTS) && !(flags & AE_DONT_WAIT))) {
331 int j;
332 aeTimeEvent *shortest = NULL;
333 struct timeval tv, *tvp;
334
335 if (flags & AE_TIME_EVENTS && !(flags & AE_DONT_WAIT))
336 shortest = aeSearchNearestTimer(eventLoop);
337 if (shortest) {
338 long now_sec, now_ms;
339
340 /* Calculate the time missing for the nearest
341 * timer to fire. */
342 aeGetTime(&now_sec, &now_ms);
343 tvp = &tv;
344 tvp->tv_sec = shortest->when_sec - now_sec;
345 if (shortest->when_ms < now_ms) {
346 tvp->tv_usec = ((shortest->when_ms+1000) - now_ms)*1000;
347 tvp->tv_sec --;
348 } else {
349 tvp->tv_usec = (shortest->when_ms - now_ms)*1000;
350 }
351 if (tvp->tv_sec < 0) tvp->tv_sec = 0;
352 if (tvp->tv_usec < 0) tvp->tv_usec = 0;
353 } else {
354 /* If we have to check for events but need to return
355 * ASAP because of AE_DONT_WAIT we need to se the timeout
356 * to zero */
357 if (flags & AE_DONT_WAIT) {
358 tv.tv_sec = tv.tv_usec = 0;
359 tvp = &tv;
360 } else {
361 /* Otherwise we can block */
362 tvp = NULL; /* wait forever */
363 }
364 }
365
366 numevents = aeApiPoll(eventLoop, tvp);
367 for (j = 0; j < numevents; j++) {
368 aeFileEvent *fe = &eventLoop->events[eventLoop->fired[j].fd];
369 int mask = eventLoop->fired[j].mask;
370 int fd = eventLoop->fired[j].fd;
371 int rfired = 0;
372
373 /* note the fe->mask & mask & ... code: maybe an already processed
374 * event removed an element that fired and we still didn't
375 * processed, so we check if the event is still valid. */
376 if (fe->mask & mask & AE_READABLE) {
377 rfired = 1;
378 fe->rfileProc(eventLoop,fd,fe->clientData,mask);
379 }
380 if (fe->mask & mask & AE_WRITABLE) {
381 if (!rfired || fe->wfileProc != fe->rfileProc)
382 fe->wfileProc(eventLoop,fd,fe->clientData,mask);
383 }
384 processed++;
385 }
386 }
387 /* Check time events */
388 if (flags & AE_TIME_EVENTS)
389 processed += processTimeEvents(eventLoop);
390
391 return processed; /* return the number of processed file/time events */
392 }
393
394 /* Wait for millseconds until the given file descriptor becomes
395 * writable/readable/exception */
396 int aeWait(int fd, int mask, long long milliseconds) {
397 struct pollfd pfd;
398 int retmask = 0, retval;
399
400 memset(&pfd, 0, sizeof(pfd));
401 pfd.fd = fd;
402 if (mask & AE_READABLE) pfd.events |= POLLIN;
403 if (mask & AE_WRITABLE) pfd.events |= POLLOUT;
404
405 if ((retval = poll(&pfd, 1, milliseconds))== 1) {
406 if (pfd.revents & POLLIN) retmask |= AE_READABLE;
407 if (pfd.revents & POLLOUT) retmask |= AE_WRITABLE;
408 if (pfd.revents & POLLERR) retmask |= AE_WRITABLE;
409 if (pfd.revents & POLLHUP) retmask |= AE_WRITABLE;
410 return retmask;
411 } else {
412 return retval;
413 }
414 }
415
416 void aeMain(aeEventLoop *eventLoop) {
417 eventLoop->stop = 0;
418 while (!eventLoop->stop) {
419 if (eventLoop->beforesleep != NULL)
420 eventLoop->beforesleep(eventLoop);
421 aeProcessEvents(eventLoop, AE_ALL_EVENTS);
422 }
423 }
424
425 char *aeGetApiName(void) {
426 return aeApiName();
427 }
428
429 void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep) {
430 eventLoop->beforesleep = beforesleep;
431 }