Commit | Line | Data |
463ee0b2 |
1 | /* |
2 | * sdbm - ndbm work-alike hashed database library |
3 | * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978). |
4 | * author: oz@nexus.yorku.ca |
5 | * status: public domain. |
6 | * |
7 | * core routines |
8 | */ |
9 | |
10 | #ifndef lint |
11 | static char rcsid[] = "$Id: sdbm.c,v 1.16 90/12/13 13:01:31 oz Exp $"; |
12 | #endif |
13 | |
85e6fe83 |
14 | #include "config.h" |
463ee0b2 |
15 | #include "sdbm.h" |
16 | #include "tune.h" |
17 | #include "pair.h" |
18 | |
85e6fe83 |
19 | #ifdef I_FCNTL |
20 | # include <fcntl.h> |
463ee0b2 |
21 | #endif |
85e6fe83 |
22 | #ifdef I_SYS_FILE |
23 | # include <sys/file.h> |
463ee0b2 |
24 | #endif |
25 | |
85e6fe83 |
26 | #ifdef I_STRING |
27 | # include <string.h> |
28 | #else |
29 | # include <strings.h> |
463ee0b2 |
30 | #endif |
31 | |
32 | /* |
33 | * externals |
34 | */ |
137443ea |
35 | #ifndef WIN32 |
463ee0b2 |
36 | #ifndef sun |
37 | extern int errno; |
38 | #endif |
39 | |
85e6fe83 |
40 | extern Malloc_t malloc proto((MEM_SIZE)); |
851efeba |
41 | extern Free_t free proto((Malloc_t)); |
85e6fe83 |
42 | extern Off_t lseek(); |
137443ea |
43 | #endif |
463ee0b2 |
44 | |
45 | /* |
46 | * forward |
47 | */ |
48 | static int getdbit proto((DBM *, long)); |
49 | static int setdbit proto((DBM *, long)); |
50 | static int getpage proto((DBM *, long)); |
51 | static datum getnext proto((DBM *)); |
52 | static int makroom proto((DBM *, long, int)); |
53 | |
54 | /* |
55 | * useful macros |
56 | */ |
57 | #define bad(x) ((x).dptr == NULL || (x).dsize < 0) |
58 | #define exhash(item) sdbm_hash((item).dptr, (item).dsize) |
59 | #define ioerr(db) ((db)->flags |= DBM_IOERR) |
60 | |
61 | #define OFF_PAG(off) (long) (off) * PBLKSIZ |
62 | #define OFF_DIR(off) (long) (off) * DBLKSIZ |
63 | |
64 | static long masks[] = { |
65 | 000000000000, 000000000001, 000000000003, 000000000007, |
66 | 000000000017, 000000000037, 000000000077, 000000000177, |
67 | 000000000377, 000000000777, 000000001777, 000000003777, |
68 | 000000007777, 000000017777, 000000037777, 000000077777, |
69 | 000000177777, 000000377777, 000000777777, 000001777777, |
70 | 000003777777, 000007777777, 000017777777, 000037777777, |
71 | 000077777777, 000177777777, 000377777777, 000777777777, |
72 | 001777777777, 003777777777, 007777777777, 017777777777 |
73 | }; |
74 | |
75 | datum nullitem = {NULL, 0}; |
76 | |
77 | DBM * |
78 | sdbm_open(file, flags, mode) |
79 | register char *file; |
80 | register int flags; |
81 | register int mode; |
82 | { |
83 | register DBM *db; |
84 | register char *dirname; |
85 | register char *pagname; |
86 | register int n; |
87 | |
88 | if (file == NULL || !*file) |
89 | return errno = EINVAL, (DBM *) NULL; |
90 | /* |
91 | * need space for two seperate filenames |
92 | */ |
93 | n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2; |
94 | |
95 | if ((dirname = malloc((unsigned) n)) == NULL) |
96 | return errno = ENOMEM, (DBM *) NULL; |
97 | /* |
98 | * build the file names |
99 | */ |
100 | dirname = strcat(strcpy(dirname, file), DIRFEXT); |
101 | pagname = strcpy(dirname + strlen(dirname) + 1, file); |
102 | pagname = strcat(pagname, PAGFEXT); |
103 | |
104 | db = sdbm_prep(dirname, pagname, flags, mode); |
105 | free((char *) dirname); |
106 | return db; |
107 | } |
108 | |
109 | DBM * |
110 | sdbm_prep(dirname, pagname, flags, mode) |
111 | char *dirname; |
112 | char *pagname; |
113 | int flags; |
114 | int mode; |
115 | { |
116 | register DBM *db; |
117 | struct stat dstat; |
118 | |
119 | if ((db = (DBM *) malloc(sizeof(DBM))) == NULL) |
120 | return errno = ENOMEM, (DBM *) NULL; |
121 | |
122 | db->flags = 0; |
123 | db->hmask = 0; |
124 | db->blkptr = 0; |
125 | db->keyptr = 0; |
126 | /* |
127 | * adjust user flags so that WRONLY becomes RDWR, |
128 | * as required by this package. Also set our internal |
129 | * flag for RDONLY if needed. |
130 | */ |
131 | if (flags & O_WRONLY) |
132 | flags = (flags & ~O_WRONLY) | O_RDWR; |
133 | |
134 | else if ((flags & 03) == O_RDONLY) |
135 | db->flags = DBM_RDONLY; |
136 | /* |
137 | * open the files in sequence, and stat the dirfile. |
138 | * If we fail anywhere, undo everything, return NULL. |
139 | */ |
137443ea |
140 | #if defined(OS2) || defined(MSDOS) || defined(WIN32) |
4633a7c4 |
141 | flags |= O_BINARY; |
142 | # endif |
463ee0b2 |
143 | if ((db->pagf = open(pagname, flags, mode)) > -1) { |
144 | if ((db->dirf = open(dirname, flags, mode)) > -1) { |
145 | /* |
146 | * need the dirfile size to establish max bit number. |
147 | */ |
148 | if (fstat(db->dirf, &dstat) == 0) { |
149 | /* |
150 | * zero size: either a fresh database, or one with a single, |
151 | * unsplit data page: dirpage is all zeros. |
152 | */ |
153 | db->dirbno = (!dstat.st_size) ? 0 : -1; |
154 | db->pagbno = -1; |
155 | db->maxbno = dstat.st_size * BYTESIZ; |
156 | |
157 | (void) memset(db->pagbuf, 0, PBLKSIZ); |
158 | (void) memset(db->dirbuf, 0, DBLKSIZ); |
159 | /* |
160 | * success |
161 | */ |
162 | return db; |
163 | } |
164 | (void) close(db->dirf); |
165 | } |
166 | (void) close(db->pagf); |
167 | } |
168 | free((char *) db); |
169 | return (DBM *) NULL; |
170 | } |
171 | |
172 | void |
173 | sdbm_close(db) |
174 | register DBM *db; |
175 | { |
176 | if (db == NULL) |
177 | errno = EINVAL; |
178 | else { |
179 | (void) close(db->dirf); |
180 | (void) close(db->pagf); |
181 | free((char *) db); |
182 | } |
183 | } |
184 | |
185 | datum |
186 | sdbm_fetch(db, key) |
187 | register DBM *db; |
188 | datum key; |
189 | { |
190 | if (db == NULL || bad(key)) |
191 | return errno = EINVAL, nullitem; |
192 | |
193 | if (getpage(db, exhash(key))) |
194 | return getpair(db->pagbuf, key); |
195 | |
196 | return ioerr(db), nullitem; |
197 | } |
198 | |
199 | int |
200 | sdbm_delete(db, key) |
201 | register DBM *db; |
202 | datum key; |
203 | { |
204 | if (db == NULL || bad(key)) |
205 | return errno = EINVAL, -1; |
206 | if (sdbm_rdonly(db)) |
207 | return errno = EPERM, -1; |
208 | |
209 | if (getpage(db, exhash(key))) { |
210 | if (!delpair(db->pagbuf, key)) |
211 | return -1; |
212 | /* |
213 | * update the page file |
214 | */ |
215 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 |
216 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) |
217 | return ioerr(db), -1; |
218 | |
219 | return 0; |
220 | } |
221 | |
222 | return ioerr(db), -1; |
223 | } |
224 | |
225 | int |
226 | sdbm_store(db, key, val, flags) |
227 | register DBM *db; |
228 | datum key; |
229 | datum val; |
230 | int flags; |
231 | { |
232 | int need; |
233 | register long hash; |
234 | |
235 | if (db == NULL || bad(key)) |
236 | return errno = EINVAL, -1; |
237 | if (sdbm_rdonly(db)) |
238 | return errno = EPERM, -1; |
239 | |
240 | need = key.dsize + val.dsize; |
241 | /* |
242 | * is the pair too big (or too small) for this database ?? |
243 | */ |
244 | if (need < 0 || need > PAIRMAX) |
245 | return errno = EINVAL, -1; |
246 | |
247 | if (getpage(db, (hash = exhash(key)))) { |
248 | /* |
249 | * if we need to replace, delete the key/data pair |
250 | * first. If it is not there, ignore. |
251 | */ |
252 | if (flags == DBM_REPLACE) |
253 | (void) delpair(db->pagbuf, key); |
254 | #ifdef SEEDUPS |
255 | else if (duppair(db->pagbuf, key)) |
256 | return 1; |
257 | #endif |
258 | /* |
259 | * if we do not have enough room, we have to split. |
260 | */ |
261 | if (!fitpair(db->pagbuf, need)) |
262 | if (!makroom(db, hash, need)) |
263 | return ioerr(db), -1; |
264 | /* |
265 | * we have enough room or split is successful. insert the key, |
266 | * and update the page file. |
267 | */ |
268 | (void) putpair(db->pagbuf, key, val); |
269 | |
270 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 |
271 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) |
272 | return ioerr(db), -1; |
273 | /* |
274 | * success |
275 | */ |
276 | return 0; |
277 | } |
278 | |
279 | return ioerr(db), -1; |
280 | } |
281 | |
282 | /* |
283 | * makroom - make room by splitting the overfull page |
284 | * this routine will attempt to make room for SPLTMAX times before |
285 | * giving up. |
286 | */ |
287 | static int |
288 | makroom(db, hash, need) |
289 | register DBM *db; |
290 | long hash; |
291 | int need; |
292 | { |
293 | long newp; |
294 | char twin[PBLKSIZ]; |
295 | char *pag = db->pagbuf; |
296 | char *new = twin; |
297 | register int smax = SPLTMAX; |
298 | |
299 | do { |
300 | /* |
301 | * split the current page |
302 | */ |
303 | (void) splpage(pag, new, db->hmask + 1); |
304 | /* |
305 | * address of the new page |
306 | */ |
307 | newp = (hash & db->hmask) | (db->hmask + 1); |
308 | |
309 | /* |
310 | * write delay, read avoidence/cache shuffle: |
311 | * select the page for incoming pair: if key is to go to the new page, |
312 | * write out the previous one, and copy the new one over, thus making |
313 | * it the current page. If not, simply write the new page, and we are |
314 | * still looking at the page of interest. current page is not updated |
315 | * here, as sdbm_store will do so, after it inserts the incoming pair. |
316 | */ |
317 | if (hash & (db->hmask + 1)) { |
318 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 |
319 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) |
320 | return 0; |
321 | db->pagbno = newp; |
322 | (void) memcpy(pag, new, PBLKSIZ); |
323 | } |
324 | else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0 |
325 | || write(db->pagf, new, PBLKSIZ) < 0) |
326 | return 0; |
327 | |
328 | if (!setdbit(db, db->curbit)) |
329 | return 0; |
330 | /* |
331 | * see if we have enough room now |
332 | */ |
333 | if (fitpair(pag, need)) |
334 | return 1; |
335 | /* |
336 | * try again... update curbit and hmask as getpage would have |
337 | * done. because of our update of the current page, we do not |
338 | * need to read in anything. BUT we have to write the current |
339 | * [deferred] page out, as the window of failure is too great. |
340 | */ |
341 | db->curbit = 2 * db->curbit + |
342 | ((hash & (db->hmask + 1)) ? 2 : 1); |
343 | db->hmask |= db->hmask + 1; |
344 | |
345 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 |
346 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) |
347 | return 0; |
348 | |
349 | } while (--smax); |
350 | /* |
351 | * if we are here, this is real bad news. After SPLTMAX splits, |
352 | * we still cannot fit the key. say goodnight. |
353 | */ |
354 | #ifdef BADMESS |
355 | (void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44); |
356 | #endif |
357 | return 0; |
358 | |
359 | } |
360 | |
361 | /* |
362 | * the following two routines will break if |
363 | * deletions aren't taken into account. (ndbm bug) |
364 | */ |
365 | datum |
366 | sdbm_firstkey(db) |
367 | register DBM *db; |
368 | { |
369 | if (db == NULL) |
370 | return errno = EINVAL, nullitem; |
371 | /* |
372 | * start at page 0 |
373 | */ |
374 | if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0 |
375 | || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) |
376 | return ioerr(db), nullitem; |
377 | db->pagbno = 0; |
378 | db->blkptr = 0; |
379 | db->keyptr = 0; |
380 | |
381 | return getnext(db); |
382 | } |
383 | |
384 | datum |
385 | sdbm_nextkey(db) |
386 | register DBM *db; |
387 | { |
388 | if (db == NULL) |
389 | return errno = EINVAL, nullitem; |
390 | return getnext(db); |
391 | } |
392 | |
393 | /* |
394 | * all important binary trie traversal |
395 | */ |
396 | static int |
397 | getpage(db, hash) |
398 | register DBM *db; |
399 | register long hash; |
400 | { |
401 | register int hbit; |
402 | register long dbit; |
403 | register long pagb; |
404 | |
405 | dbit = 0; |
406 | hbit = 0; |
407 | while (dbit < db->maxbno && getdbit(db, dbit)) |
408 | dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1); |
409 | |
410 | debug(("dbit: %d...", dbit)); |
411 | |
412 | db->curbit = dbit; |
413 | db->hmask = masks[hbit]; |
414 | |
415 | pagb = hash & db->hmask; |
416 | /* |
417 | * see if the block we need is already in memory. |
418 | * note: this lookaside cache has about 10% hit rate. |
419 | */ |
420 | if (pagb != db->pagbno) { |
421 | /* |
422 | * note: here, we assume a "hole" is read as 0s. |
423 | * if not, must zero pagbuf first. |
424 | */ |
425 | if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0 |
426 | || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) |
427 | return 0; |
428 | if (!chkpage(db->pagbuf)) |
429 | return 0; |
430 | db->pagbno = pagb; |
431 | |
432 | debug(("pag read: %d\n", pagb)); |
433 | } |
434 | return 1; |
435 | } |
436 | |
437 | static int |
438 | getdbit(db, dbit) |
439 | register DBM *db; |
440 | register long dbit; |
441 | { |
442 | register long c; |
443 | register long dirb; |
444 | |
445 | c = dbit / BYTESIZ; |
446 | dirb = c / DBLKSIZ; |
447 | |
448 | if (dirb != db->dirbno) { |
449 | if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 |
450 | || read(db->dirf, db->dirbuf, DBLKSIZ) < 0) |
451 | return 0; |
452 | db->dirbno = dirb; |
453 | |
454 | debug(("dir read: %d\n", dirb)); |
455 | } |
456 | |
457 | return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ); |
458 | } |
459 | |
460 | static int |
461 | setdbit(db, dbit) |
462 | register DBM *db; |
463 | register long dbit; |
464 | { |
465 | register long c; |
466 | register long dirb; |
467 | |
468 | c = dbit / BYTESIZ; |
469 | dirb = c / DBLKSIZ; |
470 | |
471 | if (dirb != db->dirbno) { |
472 | if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 |
473 | || read(db->dirf, db->dirbuf, DBLKSIZ) < 0) |
474 | return 0; |
475 | db->dirbno = dirb; |
476 | |
477 | debug(("dir read: %d\n", dirb)); |
478 | } |
479 | |
480 | db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ); |
481 | |
482 | if (dbit >= db->maxbno) |
483 | db->maxbno += DBLKSIZ * BYTESIZ; |
484 | |
485 | if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 |
486 | || write(db->dirf, db->dirbuf, DBLKSIZ) < 0) |
487 | return 0; |
488 | |
489 | return 1; |
490 | } |
491 | |
492 | /* |
493 | * getnext - get the next key in the page, and if done with |
494 | * the page, try the next page in sequence |
495 | */ |
496 | static datum |
497 | getnext(db) |
498 | register DBM *db; |
499 | { |
500 | datum key; |
501 | |
502 | for (;;) { |
503 | db->keyptr++; |
504 | key = getnkey(db->pagbuf, db->keyptr); |
505 | if (key.dptr != NULL) |
506 | return key; |
507 | /* |
508 | * we either run out, or there is nothing on this page.. |
509 | * try the next one... If we lost our position on the |
510 | * file, we will have to seek. |
511 | */ |
512 | db->keyptr = 0; |
513 | if (db->pagbno != db->blkptr++) |
514 | if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0) |
515 | break; |
516 | db->pagbno = db->blkptr; |
517 | if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0) |
518 | break; |
519 | if (!chkpage(db->pagbuf)) |
520 | break; |
521 | } |
522 | |
523 | return ioerr(db), nullitem; |
524 | } |
85e6fe83 |
525 | |