]>
Commit | Line | Data |
---|---|---|
9385eb3d A |
1 | /*- |
2 | * Copyright (c) 1990, 1993, 1994 | |
e9ce8d39 A |
3 | * The Regents of the University of California. All rights reserved. |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Mike Olson. | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
e9ce8d39 A |
16 | * 4. Neither the name of the University nor the names of its contributors |
17 | * may be used to endorse or promote products derived from this software | |
18 | * without specific prior written permission. | |
19 | * | |
20 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
21 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
23 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
26 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
27 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
28 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
29 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
30 | * SUCH DAMAGE. | |
31 | */ | |
32 | ||
9385eb3d A |
33 | #if defined(LIBC_SCCS) && !defined(lint) |
34 | static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; | |
35 | #endif /* LIBC_SCCS and not lint */ | |
36 | #include <sys/cdefs.h> | |
1f2f436a | 37 | __FBSDID("$FreeBSD: src/lib/libc/db/btree/bt_seq.c,v 1.7 2009/03/04 00:58:04 delphij Exp $"); |
e9ce8d39 A |
38 | |
39 | #include <sys/types.h> | |
40 | ||
41 | #include <errno.h> | |
42 | #include <stddef.h> | |
43 | #include <stdio.h> | |
44 | #include <stdlib.h> | |
45 | ||
46 | #include <db.h> | |
47 | #include "btree.h" | |
48 | ||
9385eb3d A |
49 | static int __bt_first(BTREE *, const DBT *, EPG *, int *); |
50 | static int __bt_seqadv(BTREE *, EPG *, int); | |
51 | static int __bt_seqset(BTREE *, EPG *, DBT *, int); | |
e9ce8d39 A |
52 | |
53 | /* | |
54 | * Sequential scan support. | |
55 | * | |
9385eb3d A |
56 | * The tree can be scanned sequentially, starting from either end of the |
57 | * tree or from any specific key. A scan request before any scanning is | |
58 | * done is initialized as starting from the least node. | |
e9ce8d39 A |
59 | */ |
60 | ||
61 | /* | |
9385eb3d A |
62 | * __bt_seq -- |
63 | * Btree sequential scan interface. | |
e9ce8d39 A |
64 | * |
65 | * Parameters: | |
66 | * dbp: pointer to access method | |
67 | * key: key for positioning and return value | |
68 | * data: data return value | |
69 | * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. | |
70 | * | |
71 | * Returns: | |
72 | * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. | |
73 | */ | |
74 | int | |
1f2f436a | 75 | __bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags) |
e9ce8d39 A |
76 | { |
77 | BTREE *t; | |
78 | EPG e; | |
79 | int status; | |
80 | ||
81 | t = dbp->internal; | |
82 | ||
83 | /* Toss any page pinned across calls. */ | |
84 | if (t->bt_pinned != NULL) { | |
85 | mpool_put(t->bt_mp, t->bt_pinned, 0); | |
86 | t->bt_pinned = NULL; | |
87 | } | |
88 | ||
89 | /* | |
90 | * If scan unitialized as yet, or starting at a specific record, set | |
9385eb3d A |
91 | * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin |
92 | * the page the cursor references if they're successful. | |
e9ce8d39 | 93 | */ |
9385eb3d | 94 | switch (flags) { |
e9ce8d39 A |
95 | case R_NEXT: |
96 | case R_PREV: | |
9385eb3d A |
97 | if (F_ISSET(&t->bt_cursor, CURS_INIT)) { |
98 | status = __bt_seqadv(t, &e, flags); | |
e9ce8d39 A |
99 | break; |
100 | } | |
101 | /* FALLTHROUGH */ | |
e9ce8d39 A |
102 | case R_FIRST: |
103 | case R_LAST: | |
9385eb3d A |
104 | case R_CURSOR: |
105 | status = __bt_seqset(t, &e, key, flags); | |
e9ce8d39 A |
106 | break; |
107 | default: | |
108 | errno = EINVAL; | |
109 | return (RET_ERROR); | |
110 | } | |
111 | ||
112 | if (status == RET_SUCCESS) { | |
9385eb3d | 113 | __bt_setcur(t, e.page->pgno, e.index); |
e9ce8d39 | 114 | |
9385eb3d A |
115 | status = |
116 | __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); | |
e9ce8d39 A |
117 | |
118 | /* | |
119 | * If the user is doing concurrent access, we copied the | |
120 | * key/data, toss the page. | |
121 | */ | |
9385eb3d | 122 | if (F_ISSET(t, B_DB_LOCK)) |
e9ce8d39 A |
123 | mpool_put(t->bt_mp, e.page, 0); |
124 | else | |
125 | t->bt_pinned = e.page; | |
e9ce8d39 A |
126 | } |
127 | return (status); | |
128 | } | |
129 | ||
130 | /* | |
9385eb3d A |
131 | * __bt_seqset -- |
132 | * Set the sequential scan to a specific key. | |
e9ce8d39 A |
133 | * |
134 | * Parameters: | |
135 | * t: tree | |
136 | * ep: storage for returned key | |
137 | * key: key for initial scan position | |
138 | * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV | |
139 | * | |
140 | * Side effects: | |
141 | * Pins the page the cursor references. | |
142 | * | |
143 | * Returns: | |
144 | * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. | |
145 | */ | |
146 | static int | |
1f2f436a | 147 | __bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags) |
e9ce8d39 | 148 | { |
e9ce8d39 A |
149 | PAGE *h; |
150 | pgno_t pg; | |
151 | int exact; | |
152 | ||
153 | /* | |
9385eb3d A |
154 | * Find the first, last or specific key in the tree and point the |
155 | * cursor at it. The cursor may not be moved until a new key has | |
156 | * been found. | |
e9ce8d39 | 157 | */ |
9385eb3d | 158 | switch (flags) { |
e9ce8d39 A |
159 | case R_CURSOR: /* Keyed scan. */ |
160 | /* | |
9385eb3d A |
161 | * Find the first instance of the key or the smallest key |
162 | * which is greater than or equal to the specified key. | |
e9ce8d39 A |
163 | */ |
164 | if (key->data == NULL || key->size == 0) { | |
165 | errno = EINVAL; | |
166 | return (RET_ERROR); | |
167 | } | |
9385eb3d | 168 | return (__bt_first(t, key, ep, &exact)); |
e9ce8d39 A |
169 | case R_FIRST: /* First record. */ |
170 | case R_NEXT: | |
171 | /* Walk down the left-hand side of the tree. */ | |
172 | for (pg = P_ROOT;;) { | |
173 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) | |
174 | return (RET_ERROR); | |
9385eb3d A |
175 | |
176 | /* Check for an empty tree. */ | |
177 | if (NEXTINDEX(h) == 0) { | |
178 | mpool_put(t->bt_mp, h, 0); | |
179 | return (RET_SPECIAL); | |
180 | } | |
181 | ||
e9ce8d39 A |
182 | if (h->flags & (P_BLEAF | P_RLEAF)) |
183 | break; | |
184 | pg = GETBINTERNAL(h, 0)->pgno; | |
185 | mpool_put(t->bt_mp, h, 0); | |
186 | } | |
e9ce8d39 A |
187 | ep->page = h; |
188 | ep->index = 0; | |
189 | break; | |
190 | case R_LAST: /* Last record. */ | |
191 | case R_PREV: | |
192 | /* Walk down the right-hand side of the tree. */ | |
193 | for (pg = P_ROOT;;) { | |
194 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) | |
195 | return (RET_ERROR); | |
9385eb3d A |
196 | |
197 | /* Check for an empty tree. */ | |
198 | if (NEXTINDEX(h) == 0) { | |
199 | mpool_put(t->bt_mp, h, 0); | |
200 | return (RET_SPECIAL); | |
201 | } | |
202 | ||
e9ce8d39 A |
203 | if (h->flags & (P_BLEAF | P_RLEAF)) |
204 | break; | |
205 | pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; | |
206 | mpool_put(t->bt_mp, h, 0); | |
207 | } | |
208 | ||
e9ce8d39 A |
209 | ep->page = h; |
210 | ep->index = NEXTINDEX(h) - 1; | |
211 | break; | |
212 | } | |
213 | return (RET_SUCCESS); | |
214 | } | |
215 | ||
216 | /* | |
9385eb3d A |
217 | * __bt_seqadvance -- |
218 | * Advance the sequential scan. | |
e9ce8d39 A |
219 | * |
220 | * Parameters: | |
221 | * t: tree | |
222 | * flags: R_NEXT, R_PREV | |
223 | * | |
224 | * Side effects: | |
225 | * Pins the page the new key/data record is on. | |
226 | * | |
227 | * Returns: | |
228 | * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. | |
229 | */ | |
230 | static int | |
1f2f436a | 231 | __bt_seqadv(BTREE *t, EPG *ep, int flags) |
e9ce8d39 | 232 | { |
9385eb3d | 233 | CURSOR *c; |
e9ce8d39 | 234 | PAGE *h; |
1f2f436a | 235 | indx_t idx; |
e9ce8d39 | 236 | pgno_t pg; |
9385eb3d | 237 | int exact; |
e9ce8d39 | 238 | |
9385eb3d A |
239 | /* |
240 | * There are a couple of states that we can be in. The cursor has | |
241 | * been initialized by the time we get here, but that's all we know. | |
242 | */ | |
243 | c = &t->bt_cursor; | |
e9ce8d39 | 244 | |
9385eb3d A |
245 | /* |
246 | * The cursor was deleted where there weren't any duplicate records, | |
247 | * so the key was saved. Find out where that key would go in the | |
248 | * current tree. It doesn't matter if the returned key is an exact | |
249 | * match or not -- if it's an exact match, the record was added after | |
250 | * the delete so we can just return it. If not, as long as there's | |
251 | * a record there, return it. | |
252 | */ | |
253 | if (F_ISSET(c, CURS_ACQUIRE)) | |
254 | return (__bt_first(t, &c->key, ep, &exact)); | |
255 | ||
256 | /* Get the page referenced by the cursor. */ | |
257 | if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) | |
e9ce8d39 A |
258 | return (RET_ERROR); |
259 | ||
260 | /* | |
1f2f436a | 261 | * Find the next/previous record in the tree and point the cursor at |
9385eb3d | 262 | * it. The cursor may not be moved until a new key has been found. |
e9ce8d39 | 263 | */ |
9385eb3d | 264 | switch (flags) { |
e9ce8d39 | 265 | case R_NEXT: /* Next record. */ |
9385eb3d A |
266 | /* |
267 | * The cursor was deleted in duplicate records, and moved | |
268 | * forward to a record that has yet to be returned. Clear | |
269 | * that flag, and return the record. | |
270 | */ | |
271 | if (F_ISSET(c, CURS_AFTER)) | |
272 | goto usecurrent; | |
1f2f436a A |
273 | idx = c->pg.index; |
274 | if (++idx == NEXTINDEX(h)) { | |
9385eb3d A |
275 | pg = h->nextpg; |
276 | mpool_put(t->bt_mp, h, 0); | |
277 | if (pg == P_INVALID) | |
278 | return (RET_SPECIAL); | |
279 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) | |
280 | return (RET_ERROR); | |
1f2f436a | 281 | idx = 0; |
e9ce8d39 A |
282 | } |
283 | break; | |
284 | case R_PREV: /* Previous record. */ | |
9385eb3d A |
285 | /* |
286 | * The cursor was deleted in duplicate records, and moved | |
287 | * backward to a record that has yet to be returned. Clear | |
288 | * that flag, and return the record. | |
289 | */ | |
290 | if (F_ISSET(c, CURS_BEFORE)) { | |
291 | usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); | |
292 | ep->page = h; | |
293 | ep->index = c->pg.index; | |
294 | return (RET_SUCCESS); | |
e9ce8d39 | 295 | } |
1f2f436a A |
296 | idx = c->pg.index; |
297 | if (idx == 0) { | |
9385eb3d A |
298 | pg = h->prevpg; |
299 | mpool_put(t->bt_mp, h, 0); | |
300 | if (pg == P_INVALID) | |
301 | return (RET_SPECIAL); | |
302 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) | |
303 | return (RET_ERROR); | |
1f2f436a | 304 | idx = NEXTINDEX(h) - 1; |
9385eb3d | 305 | } else |
1f2f436a | 306 | --idx; |
e9ce8d39 A |
307 | break; |
308 | } | |
309 | ||
9385eb3d | 310 | ep->page = h; |
1f2f436a | 311 | ep->index = idx; |
9385eb3d A |
312 | return (RET_SUCCESS); |
313 | } | |
314 | ||
315 | /* | |
316 | * __bt_first -- | |
317 | * Find the first entry. | |
318 | * | |
319 | * Parameters: | |
320 | * t: the tree | |
321 | * key: the key | |
322 | * erval: return EPG | |
323 | * exactp: pointer to exact match flag | |
324 | * | |
325 | * Returns: | |
326 | * The first entry in the tree greater than or equal to key, | |
327 | * or RET_SPECIAL if no such key exists. | |
328 | */ | |
329 | static int | |
1f2f436a | 330 | __bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp) |
9385eb3d A |
331 | { |
332 | PAGE *h; | |
333 | EPG *ep, save; | |
334 | pgno_t pg; | |
e9ce8d39 A |
335 | |
336 | /* | |
9385eb3d A |
337 | * Find any matching record; __bt_search pins the page. |
338 | * | |
339 | * If it's an exact match and duplicates are possible, walk backwards | |
340 | * in the tree until we find the first one. Otherwise, make sure it's | |
341 | * a valid key (__bt_search may return an index just past the end of a | |
342 | * page) and return it. | |
e9ce8d39 | 343 | */ |
9385eb3d A |
344 | if ((ep = __bt_search(t, key, exactp)) == NULL) |
345 | return (0); | |
346 | if (*exactp) { | |
347 | if (F_ISSET(t, B_NODUPS)) { | |
348 | *erval = *ep; | |
349 | return (RET_SUCCESS); | |
350 | } | |
1f2f436a | 351 | |
9385eb3d A |
352 | /* |
353 | * Walk backwards, as long as the entry matches and there are | |
354 | * keys left in the tree. Save a copy of each match in case | |
355 | * we go too far. | |
356 | */ | |
357 | save = *ep; | |
358 | h = ep->page; | |
359 | do { | |
360 | if (save.page->pgno != ep->page->pgno) { | |
361 | mpool_put(t->bt_mp, save.page, 0); | |
362 | save = *ep; | |
363 | } else | |
364 | save.index = ep->index; | |
365 | ||
366 | /* | |
367 | * Don't unpin the page the last (or original) match | |
368 | * was on, but make sure it's unpinned if an error | |
369 | * occurs. | |
370 | */ | |
371 | if (ep->index == 0) { | |
372 | if (h->prevpg == P_INVALID) | |
373 | break; | |
374 | if (h->pgno != save.page->pgno) | |
375 | mpool_put(t->bt_mp, h, 0); | |
376 | if ((h = mpool_get(t->bt_mp, | |
377 | h->prevpg, 0)) == NULL) { | |
378 | if (h->pgno == save.page->pgno) | |
379 | mpool_put(t->bt_mp, | |
380 | save.page, 0); | |
381 | return (RET_ERROR); | |
382 | } | |
383 | ep->page = h; | |
384 | ep->index = NEXTINDEX(h); | |
385 | } | |
386 | --ep->index; | |
387 | } while (__bt_cmp(t, key, ep) == 0); | |
388 | ||
389 | /* | |
390 | * Reach here with the last page that was looked at pinned, | |
391 | * which may or may not be the same as the last (or original) | |
392 | * match page. If it's not useful, release it. | |
393 | */ | |
394 | if (h->pgno != save.page->pgno) | |
395 | mpool_put(t->bt_mp, h, 0); | |
396 | ||
397 | *erval = save; | |
398 | return (RET_SUCCESS); | |
399 | } | |
400 | ||
401 | /* If at the end of a page, find the next entry. */ | |
402 | if (ep->index == NEXTINDEX(ep->page)) { | |
403 | h = ep->page; | |
404 | pg = h->nextpg; | |
405 | mpool_put(t->bt_mp, h, 0); | |
406 | if (pg == P_INVALID) | |
407 | return (RET_SPECIAL); | |
408 | if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) | |
e9ce8d39 | 409 | return (RET_ERROR); |
9385eb3d A |
410 | ep->index = 0; |
411 | ep->page = h; | |
e9ce8d39 | 412 | } |
9385eb3d | 413 | *erval = *ep; |
e9ce8d39 A |
414 | return (RET_SUCCESS); |
415 | } | |
416 | ||
417 | /* | |
9385eb3d A |
418 | * __bt_setcur -- |
419 | * Set the cursor to an entry in the tree. | |
e9ce8d39 A |
420 | * |
421 | * Parameters: | |
9385eb3d A |
422 | * t: the tree |
423 | * pgno: page number | |
1f2f436a | 424 | * idx: page index |
e9ce8d39 | 425 | */ |
9385eb3d | 426 | void |
1f2f436a | 427 | __bt_setcur(BTREE *t, pgno_t pgno, u_int idx) |
e9ce8d39 | 428 | { |
9385eb3d A |
429 | /* Lose any already deleted key. */ |
430 | if (t->bt_cursor.key.data != NULL) { | |
431 | free(t->bt_cursor.key.data); | |
432 | t->bt_cursor.key.size = 0; | |
433 | t->bt_cursor.key.data = NULL; | |
434 | } | |
435 | F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); | |
e9ce8d39 | 436 | |
9385eb3d A |
437 | /* Update the cursor. */ |
438 | t->bt_cursor.pg.pgno = pgno; | |
1f2f436a | 439 | t->bt_cursor.pg.index = idx; |
9385eb3d | 440 | F_SET(&t->bt_cursor, CURS_INIT); |
e9ce8d39 | 441 | } |