]>
Commit | Line | Data |
---|---|---|
1 | /*- | |
2 | * Copyright (c) 1990, 1993, 1994 | |
3 | * The Regents of the University of California. All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Mike Olson. | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * 4. Neither the name of the University nor the names of its contributors | |
17 | * may be used to endorse or promote products derived from this software | |
18 | * without specific prior written permission. | |
19 | * | |
20 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
21 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
23 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
26 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
27 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
28 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
29 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
30 | * SUCH DAMAGE. | |
31 | */ | |
32 | ||
33 | #if defined(LIBC_SCCS) && !defined(lint) | |
34 | static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; | |
35 | #endif /* LIBC_SCCS and not lint */ | |
36 | #include <sys/cdefs.h> | |
37 | __FBSDID("$FreeBSD: src/lib/libc/db/btree/bt_seq.c,v 1.7 2009/03/04 00:58:04 delphij Exp $"); | |
38 | ||
39 | #include <sys/types.h> | |
40 | ||
41 | #include <errno.h> | |
42 | #include <stddef.h> | |
43 | #include <stdio.h> | |
44 | #include <stdlib.h> | |
45 | ||
46 | #include <db.h> | |
47 | #include "btree.h" | |
48 | ||
49 | static int __bt_first(BTREE *, const DBT *, EPG *, int *); | |
50 | static int __bt_seqadv(BTREE *, EPG *, int); | |
51 | static int __bt_seqset(BTREE *, EPG *, DBT *, int); | |
52 | ||
53 | /* | |
54 | * Sequential scan support. | |
55 | * | |
56 | * The tree can be scanned sequentially, starting from either end of the | |
57 | * tree or from any specific key. A scan request before any scanning is | |
58 | * done is initialized as starting from the least node. | |
59 | */ | |
60 | ||
61 | /* | |
62 | * __bt_seq -- | |
63 | * Btree sequential scan interface. | |
64 | * | |
65 | * Parameters: | |
66 | * dbp: pointer to access method | |
67 | * key: key for positioning and return value | |
68 | * data: data return value | |
69 | * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. | |
70 | * | |
71 | * Returns: | |
72 | * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. | |
73 | */ | |
74 | int | |
75 | __bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags) | |
76 | { | |
77 | BTREE *t; | |
78 | EPG e; | |
79 | int status; | |
80 | ||
81 | t = dbp->internal; | |
82 | ||
83 | /* Toss any page pinned across calls. */ | |
84 | if (t->bt_pinned != NULL) { | |
85 | mpool_put(t->bt_mp, t->bt_pinned, 0); | |
86 | t->bt_pinned = NULL; | |
87 | } | |
88 | ||
89 | /* | |
90 | * If scan unitialized as yet, or starting at a specific record, set | |
91 | * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin | |
92 | * the page the cursor references if they're successful. | |
93 | */ | |
94 | switch (flags) { | |
95 | case R_NEXT: | |
96 | case R_PREV: | |
97 | if (F_ISSET(&t->bt_cursor, CURS_INIT)) { | |
98 | status = __bt_seqadv(t, &e, flags); | |
99 | break; | |
100 | } | |
101 | /* FALLTHROUGH */ | |
102 | case R_FIRST: | |
103 | case R_LAST: | |
104 | case R_CURSOR: | |
105 | status = __bt_seqset(t, &e, key, flags); | |
106 | break; | |
107 | default: | |
108 | errno = EINVAL; | |
109 | return (RET_ERROR); | |
110 | } | |
111 | ||
112 | if (status == RET_SUCCESS) { | |
113 | __bt_setcur(t, e.page->pgno, e.index); | |
114 | ||
115 | status = | |
116 | __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); | |
117 | ||
118 | /* | |
119 | * If the user is doing concurrent access, we copied the | |
120 | * key/data, toss the page. | |
121 | */ | |
122 | if (F_ISSET(t, B_DB_LOCK)) | |
123 | mpool_put(t->bt_mp, e.page, 0); | |
124 | else | |
125 | t->bt_pinned = e.page; | |
126 | } | |
127 | return (status); | |
128 | } | |
129 | ||
130 | /* | |
131 | * __bt_seqset -- | |
132 | * Set the sequential scan to a specific key. | |
133 | * | |
134 | * Parameters: | |
135 | * t: tree | |
136 | * ep: storage for returned key | |
137 | * key: key for initial scan position | |
138 | * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV | |
139 | * | |
140 | * Side effects: | |
141 | * Pins the page the cursor references. | |
142 | * | |
143 | * Returns: | |
144 | * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. | |
145 | */ | |
146 | static int | |
147 | __bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags) | |
148 | { | |
149 | PAGE *h; | |
150 | pgno_t pg; | |
151 | int exact; | |
152 | ||
153 | /* | |
154 | * Find the first, last or specific key in the tree and point the | |
155 | * cursor at it. The cursor may not be moved until a new key has | |
156 | * been found. | |
157 | */ | |
158 | switch (flags) { | |
159 | case R_CURSOR: /* Keyed scan. */ | |
160 | /* | |
161 | * Find the first instance of the key or the smallest key | |
162 | * which is greater than or equal to the specified key. | |
163 | */ | |
164 | if (key->data == NULL || key->size == 0) { | |
165 | errno = EINVAL; | |
166 | return (RET_ERROR); | |
167 | } | |
168 | return (__bt_first(t, key, ep, &exact)); | |
169 | case R_FIRST: /* First record. */ | |
170 | case R_NEXT: | |
171 | /* Walk down the left-hand side of the tree. */ | |
172 | for (pg = P_ROOT;;) { | |
173 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) | |
174 | return (RET_ERROR); | |
175 | ||
176 | /* Check for an empty tree. */ | |
177 | if (NEXTINDEX(h) == 0) { | |
178 | mpool_put(t->bt_mp, h, 0); | |
179 | return (RET_SPECIAL); | |
180 | } | |
181 | ||
182 | if (h->flags & (P_BLEAF | P_RLEAF)) | |
183 | break; | |
184 | pg = GETBINTERNAL(h, 0)->pgno; | |
185 | mpool_put(t->bt_mp, h, 0); | |
186 | } | |
187 | ep->page = h; | |
188 | ep->index = 0; | |
189 | break; | |
190 | case R_LAST: /* Last record. */ | |
191 | case R_PREV: | |
192 | /* Walk down the right-hand side of the tree. */ | |
193 | for (pg = P_ROOT;;) { | |
194 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) | |
195 | return (RET_ERROR); | |
196 | ||
197 | /* Check for an empty tree. */ | |
198 | if (NEXTINDEX(h) == 0) { | |
199 | mpool_put(t->bt_mp, h, 0); | |
200 | return (RET_SPECIAL); | |
201 | } | |
202 | ||
203 | if (h->flags & (P_BLEAF | P_RLEAF)) | |
204 | break; | |
205 | pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; | |
206 | mpool_put(t->bt_mp, h, 0); | |
207 | } | |
208 | ||
209 | ep->page = h; | |
210 | ep->index = NEXTINDEX(h) - 1; | |
211 | break; | |
212 | } | |
213 | return (RET_SUCCESS); | |
214 | } | |
215 | ||
216 | /* | |
217 | * __bt_seqadvance -- | |
218 | * Advance the sequential scan. | |
219 | * | |
220 | * Parameters: | |
221 | * t: tree | |
222 | * flags: R_NEXT, R_PREV | |
223 | * | |
224 | * Side effects: | |
225 | * Pins the page the new key/data record is on. | |
226 | * | |
227 | * Returns: | |
228 | * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. | |
229 | */ | |
230 | static int | |
231 | __bt_seqadv(BTREE *t, EPG *ep, int flags) | |
232 | { | |
233 | CURSOR *c; | |
234 | PAGE *h; | |
235 | indx_t idx; | |
236 | pgno_t pg; | |
237 | int exact; | |
238 | ||
239 | /* | |
240 | * There are a couple of states that we can be in. The cursor has | |
241 | * been initialized by the time we get here, but that's all we know. | |
242 | */ | |
243 | c = &t->bt_cursor; | |
244 | ||
245 | /* | |
246 | * The cursor was deleted where there weren't any duplicate records, | |
247 | * so the key was saved. Find out where that key would go in the | |
248 | * current tree. It doesn't matter if the returned key is an exact | |
249 | * match or not -- if it's an exact match, the record was added after | |
250 | * the delete so we can just return it. If not, as long as there's | |
251 | * a record there, return it. | |
252 | */ | |
253 | if (F_ISSET(c, CURS_ACQUIRE)) | |
254 | return (__bt_first(t, &c->key, ep, &exact)); | |
255 | ||
256 | /* Get the page referenced by the cursor. */ | |
257 | if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) | |
258 | return (RET_ERROR); | |
259 | ||
260 | /* | |
261 | * Find the next/previous record in the tree and point the cursor at | |
262 | * it. The cursor may not be moved until a new key has been found. | |
263 | */ | |
264 | switch (flags) { | |
265 | case R_NEXT: /* Next record. */ | |
266 | /* | |
267 | * The cursor was deleted in duplicate records, and moved | |
268 | * forward to a record that has yet to be returned. Clear | |
269 | * that flag, and return the record. | |
270 | */ | |
271 | if (F_ISSET(c, CURS_AFTER)) | |
272 | goto usecurrent; | |
273 | idx = c->pg.index; | |
274 | if (++idx == NEXTINDEX(h)) { | |
275 | pg = h->nextpg; | |
276 | mpool_put(t->bt_mp, h, 0); | |
277 | if (pg == P_INVALID) | |
278 | return (RET_SPECIAL); | |
279 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) | |
280 | return (RET_ERROR); | |
281 | idx = 0; | |
282 | } | |
283 | break; | |
284 | case R_PREV: /* Previous record. */ | |
285 | /* | |
286 | * The cursor was deleted in duplicate records, and moved | |
287 | * backward to a record that has yet to be returned. Clear | |
288 | * that flag, and return the record. | |
289 | */ | |
290 | if (F_ISSET(c, CURS_BEFORE)) { | |
291 | usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); | |
292 | ep->page = h; | |
293 | ep->index = c->pg.index; | |
294 | return (RET_SUCCESS); | |
295 | } | |
296 | idx = c->pg.index; | |
297 | if (idx == 0) { | |
298 | pg = h->prevpg; | |
299 | mpool_put(t->bt_mp, h, 0); | |
300 | if (pg == P_INVALID) | |
301 | return (RET_SPECIAL); | |
302 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) | |
303 | return (RET_ERROR); | |
304 | idx = NEXTINDEX(h) - 1; | |
305 | } else | |
306 | --idx; | |
307 | break; | |
308 | } | |
309 | ||
310 | ep->page = h; | |
311 | ep->index = idx; | |
312 | return (RET_SUCCESS); | |
313 | } | |
314 | ||
315 | /* | |
316 | * __bt_first -- | |
317 | * Find the first entry. | |
318 | * | |
319 | * Parameters: | |
320 | * t: the tree | |
321 | * key: the key | |
322 | * erval: return EPG | |
323 | * exactp: pointer to exact match flag | |
324 | * | |
325 | * Returns: | |
326 | * The first entry in the tree greater than or equal to key, | |
327 | * or RET_SPECIAL if no such key exists. | |
328 | */ | |
329 | static int | |
330 | __bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp) | |
331 | { | |
332 | PAGE *h; | |
333 | EPG *ep, save; | |
334 | pgno_t pg; | |
335 | ||
336 | /* | |
337 | * Find any matching record; __bt_search pins the page. | |
338 | * | |
339 | * If it's an exact match and duplicates are possible, walk backwards | |
340 | * in the tree until we find the first one. Otherwise, make sure it's | |
341 | * a valid key (__bt_search may return an index just past the end of a | |
342 | * page) and return it. | |
343 | */ | |
344 | if ((ep = __bt_search(t, key, exactp)) == NULL) | |
345 | return (RET_SPECIAL); | |
346 | if (*exactp) { | |
347 | if (F_ISSET(t, B_NODUPS)) { | |
348 | *erval = *ep; | |
349 | return (RET_SUCCESS); | |
350 | } | |
351 | ||
352 | /* | |
353 | * Walk backwards, as long as the entry matches and there are | |
354 | * keys left in the tree. Save a copy of each match in case | |
355 | * we go too far. | |
356 | */ | |
357 | save = *ep; | |
358 | h = ep->page; | |
359 | do { | |
360 | if (save.page->pgno != ep->page->pgno) { | |
361 | mpool_put(t->bt_mp, save.page, 0); | |
362 | save = *ep; | |
363 | } else | |
364 | save.index = ep->index; | |
365 | ||
366 | /* | |
367 | * Don't unpin the page the last (or original) match | |
368 | * was on, but make sure it's unpinned if an error | |
369 | * occurs. | |
370 | */ | |
371 | if (ep->index == 0) { | |
372 | PAGE *hprev; | |
373 | if (h->prevpg == P_INVALID) | |
374 | break; | |
375 | if (h->pgno != save.page->pgno) | |
376 | mpool_put(t->bt_mp, h, 0); | |
377 | if ((hprev = mpool_get(t->bt_mp, | |
378 | h->prevpg, 0)) == NULL) { | |
379 | if (h->pgno == save.page->pgno) | |
380 | mpool_put(t->bt_mp, | |
381 | save.page, 0); | |
382 | return (RET_ERROR); | |
383 | } | |
384 | ep->page = h = hprev; | |
385 | ep->index = NEXTINDEX(h); | |
386 | } | |
387 | --ep->index; | |
388 | } while (__bt_cmp(t, key, ep) == 0); | |
389 | ||
390 | /* | |
391 | * Reach here with the last page that was looked at pinned, | |
392 | * which may or may not be the same as the last (or original) | |
393 | * match page. If it's not useful, release it. | |
394 | */ | |
395 | if (h->pgno != save.page->pgno) | |
396 | mpool_put(t->bt_mp, h, 0); | |
397 | ||
398 | *erval = save; | |
399 | return (RET_SUCCESS); | |
400 | } | |
401 | ||
402 | /* If at the end of a page, find the next entry. */ | |
403 | if (ep->index == NEXTINDEX(ep->page)) { | |
404 | h = ep->page; | |
405 | pg = h->nextpg; | |
406 | mpool_put(t->bt_mp, h, 0); | |
407 | if (pg == P_INVALID) | |
408 | return (RET_SPECIAL); | |
409 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) | |
410 | return (RET_ERROR); | |
411 | ep->index = 0; | |
412 | ep->page = h; | |
413 | } | |
414 | *erval = *ep; | |
415 | return (RET_SUCCESS); | |
416 | } | |
417 | ||
418 | /* | |
419 | * __bt_setcur -- | |
420 | * Set the cursor to an entry in the tree. | |
421 | * | |
422 | * Parameters: | |
423 | * t: the tree | |
424 | * pgno: page number | |
425 | * idx: page index | |
426 | */ | |
427 | void | |
428 | __bt_setcur(BTREE *t, pgno_t pgno, u_int idx) | |
429 | { | |
430 | /* Lose any already deleted key. */ | |
431 | if (t->bt_cursor.key.data != NULL) { | |
432 | free(t->bt_cursor.key.data); | |
433 | t->bt_cursor.key.size = 0; | |
434 | t->bt_cursor.key.data = NULL; | |
435 | } | |
436 | F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); | |
437 | ||
438 | /* Update the cursor. */ | |
439 | t->bt_cursor.pg.pgno = pgno; | |
440 | t->bt_cursor.pg.index = idx; | |
441 | F_SET(&t->bt_cursor, CURS_INIT); | |
442 | } |