Commit | Line | Data |
463ee0b2 |
1 | /* |
2 | * sdbm - ndbm work-alike hashed database library |
3 | * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978). |
4 | * author: oz@nexus.yorku.ca |
5 | * status: public domain. |
6 | * |
7 | * core routines |
8 | */ |
9 | |
17f28c40 |
10 | #include "INTERN.h" |
85e6fe83 |
11 | #include "config.h" |
4f63d024 |
12 | #ifdef WIN32 |
13 | #include "io.h" |
14 | #endif |
463ee0b2 |
15 | #include "sdbm.h" |
16 | #include "tune.h" |
17 | #include "pair.h" |
18 | |
85e6fe83 |
19 | #ifdef I_FCNTL |
20 | # include <fcntl.h> |
463ee0b2 |
21 | #endif |
85e6fe83 |
22 | #ifdef I_SYS_FILE |
23 | # include <sys/file.h> |
463ee0b2 |
24 | #endif |
25 | |
85e6fe83 |
26 | #ifdef I_STRING |
5518ecd4 |
27 | # ifndef __ultrix__ |
28 | # include <string.h> |
29 | # endif |
85e6fe83 |
30 | #else |
31 | # include <strings.h> |
463ee0b2 |
32 | #endif |
33 | |
34 | /* |
35 | * externals |
36 | */ |
81770b0c |
37 | |
38 | #include <errno.h> /* See notes in perl.h about avoiding |
39 | extern int errno; */ |
463ee0b2 |
40 | |
85e6fe83 |
41 | extern Malloc_t malloc proto((MEM_SIZE)); |
851efeba |
42 | extern Free_t free proto((Malloc_t)); |
bf0c440f |
43 | |
463ee0b2 |
44 | /* |
45 | * forward |
46 | */ |
47 | static int getdbit proto((DBM *, long)); |
48 | static int setdbit proto((DBM *, long)); |
49 | static int getpage proto((DBM *, long)); |
50 | static datum getnext proto((DBM *)); |
51 | static int makroom proto((DBM *, long, int)); |
52 | |
53 | /* |
54 | * useful macros |
55 | */ |
56 | #define bad(x) ((x).dptr == NULL || (x).dsize < 0) |
57 | #define exhash(item) sdbm_hash((item).dptr, (item).dsize) |
58 | #define ioerr(db) ((db)->flags |= DBM_IOERR) |
59 | |
60 | #define OFF_PAG(off) (long) (off) * PBLKSIZ |
61 | #define OFF_DIR(off) (long) (off) * DBLKSIZ |
62 | |
27da23d5 |
63 | static const long masks[] = { |
463ee0b2 |
64 | 000000000000, 000000000001, 000000000003, 000000000007, |
65 | 000000000017, 000000000037, 000000000077, 000000000177, |
66 | 000000000377, 000000000777, 000000001777, 000000003777, |
67 | 000000007777, 000000017777, 000000037777, 000000077777, |
68 | 000000177777, 000000377777, 000000777777, 000001777777, |
69 | 000003777777, 000007777777, 000017777777, 000037777777, |
70 | 000077777777, 000177777777, 000377777777, 000777777777, |
71 | 001777777777, 003777777777, 007777777777, 017777777777 |
72 | }; |
73 | |
463ee0b2 |
74 | DBM * |
f0f333f4 |
75 | sdbm_open(register char *file, register int flags, register int mode) |
463ee0b2 |
76 | { |
77 | register DBM *db; |
78 | register char *dirname; |
79 | register char *pagname; |
80 | register int n; |
81 | |
82 | if (file == NULL || !*file) |
83 | return errno = EINVAL, (DBM *) NULL; |
84 | /* |
85 | * need space for two seperate filenames |
86 | */ |
87 | n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2; |
88 | |
f0f333f4 |
89 | if ((dirname = (char *) malloc((unsigned) n)) == NULL) |
463ee0b2 |
90 | return errno = ENOMEM, (DBM *) NULL; |
91 | /* |
92 | * build the file names |
93 | */ |
94 | dirname = strcat(strcpy(dirname, file), DIRFEXT); |
95 | pagname = strcpy(dirname + strlen(dirname) + 1, file); |
96 | pagname = strcat(pagname, PAGFEXT); |
97 | |
98 | db = sdbm_prep(dirname, pagname, flags, mode); |
99 | free((char *) dirname); |
100 | return db; |
101 | } |
102 | |
103 | DBM * |
f0f333f4 |
104 | sdbm_prep(char *dirname, char *pagname, int flags, int mode) |
463ee0b2 |
105 | { |
106 | register DBM *db; |
107 | struct stat dstat; |
108 | |
109 | if ((db = (DBM *) malloc(sizeof(DBM))) == NULL) |
110 | return errno = ENOMEM, (DBM *) NULL; |
111 | |
112 | db->flags = 0; |
113 | db->hmask = 0; |
114 | db->blkptr = 0; |
115 | db->keyptr = 0; |
116 | /* |
117 | * adjust user flags so that WRONLY becomes RDWR, |
118 | * as required by this package. Also set our internal |
119 | * flag for RDONLY if needed. |
120 | */ |
121 | if (flags & O_WRONLY) |
122 | flags = (flags & ~O_WRONLY) | O_RDWR; |
123 | |
124 | else if ((flags & 03) == O_RDONLY) |
125 | db->flags = DBM_RDONLY; |
126 | /* |
127 | * open the files in sequence, and stat the dirfile. |
128 | * If we fail anywhere, undo everything, return NULL. |
129 | */ |
1761cee5 |
130 | #if defined(OS2) || defined(MSDOS) || defined(WIN32) || defined(__CYGWIN__) |
4633a7c4 |
131 | flags |= O_BINARY; |
132 | # endif |
463ee0b2 |
133 | if ((db->pagf = open(pagname, flags, mode)) > -1) { |
134 | if ((db->dirf = open(dirname, flags, mode)) > -1) { |
135 | /* |
136 | * need the dirfile size to establish max bit number. |
137 | */ |
138 | if (fstat(db->dirf, &dstat) == 0) { |
139 | /* |
140 | * zero size: either a fresh database, or one with a single, |
141 | * unsplit data page: dirpage is all zeros. |
142 | */ |
143 | db->dirbno = (!dstat.st_size) ? 0 : -1; |
144 | db->pagbno = -1; |
145 | db->maxbno = dstat.st_size * BYTESIZ; |
146 | |
147 | (void) memset(db->pagbuf, 0, PBLKSIZ); |
148 | (void) memset(db->dirbuf, 0, DBLKSIZ); |
149 | /* |
150 | * success |
151 | */ |
152 | return db; |
153 | } |
154 | (void) close(db->dirf); |
155 | } |
156 | (void) close(db->pagf); |
157 | } |
158 | free((char *) db); |
159 | return (DBM *) NULL; |
160 | } |
161 | |
162 | void |
f0f333f4 |
163 | sdbm_close(register DBM *db) |
463ee0b2 |
164 | { |
165 | if (db == NULL) |
166 | errno = EINVAL; |
167 | else { |
168 | (void) close(db->dirf); |
169 | (void) close(db->pagf); |
170 | free((char *) db); |
171 | } |
172 | } |
173 | |
174 | datum |
f0f333f4 |
175 | sdbm_fetch(register DBM *db, datum key) |
463ee0b2 |
176 | { |
177 | if (db == NULL || bad(key)) |
178 | return errno = EINVAL, nullitem; |
179 | |
180 | if (getpage(db, exhash(key))) |
181 | return getpair(db->pagbuf, key); |
182 | |
183 | return ioerr(db), nullitem; |
184 | } |
185 | |
186 | int |
f4b9d880 |
187 | sdbm_exists(register DBM *db, datum key) |
188 | { |
189 | if (db == NULL || bad(key)) |
190 | return errno = EINVAL, -1; |
191 | |
192 | if (getpage(db, exhash(key))) |
193 | return exipair(db->pagbuf, key); |
194 | |
195 | return ioerr(db), -1; |
196 | } |
197 | |
198 | int |
f0f333f4 |
199 | sdbm_delete(register DBM *db, datum key) |
463ee0b2 |
200 | { |
201 | if (db == NULL || bad(key)) |
202 | return errno = EINVAL, -1; |
203 | if (sdbm_rdonly(db)) |
204 | return errno = EPERM, -1; |
205 | |
206 | if (getpage(db, exhash(key))) { |
207 | if (!delpair(db->pagbuf, key)) |
208 | return -1; |
209 | /* |
210 | * update the page file |
211 | */ |
212 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 |
213 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) |
214 | return ioerr(db), -1; |
215 | |
216 | return 0; |
217 | } |
218 | |
219 | return ioerr(db), -1; |
220 | } |
221 | |
222 | int |
f0f333f4 |
223 | sdbm_store(register DBM *db, datum key, datum val, int flags) |
463ee0b2 |
224 | { |
225 | int need; |
226 | register long hash; |
227 | |
228 | if (db == NULL || bad(key)) |
229 | return errno = EINVAL, -1; |
230 | if (sdbm_rdonly(db)) |
231 | return errno = EPERM, -1; |
232 | |
233 | need = key.dsize + val.dsize; |
234 | /* |
235 | * is the pair too big (or too small) for this database ?? |
236 | */ |
237 | if (need < 0 || need > PAIRMAX) |
238 | return errno = EINVAL, -1; |
239 | |
240 | if (getpage(db, (hash = exhash(key)))) { |
241 | /* |
242 | * if we need to replace, delete the key/data pair |
243 | * first. If it is not there, ignore. |
244 | */ |
245 | if (flags == DBM_REPLACE) |
246 | (void) delpair(db->pagbuf, key); |
247 | #ifdef SEEDUPS |
248 | else if (duppair(db->pagbuf, key)) |
249 | return 1; |
250 | #endif |
251 | /* |
252 | * if we do not have enough room, we have to split. |
253 | */ |
254 | if (!fitpair(db->pagbuf, need)) |
255 | if (!makroom(db, hash, need)) |
256 | return ioerr(db), -1; |
257 | /* |
258 | * we have enough room or split is successful. insert the key, |
259 | * and update the page file. |
260 | */ |
261 | (void) putpair(db->pagbuf, key, val); |
262 | |
263 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 |
264 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) |
265 | return ioerr(db), -1; |
266 | /* |
267 | * success |
268 | */ |
269 | return 0; |
270 | } |
271 | |
272 | return ioerr(db), -1; |
273 | } |
274 | |
275 | /* |
276 | * makroom - make room by splitting the overfull page |
277 | * this routine will attempt to make room for SPLTMAX times before |
278 | * giving up. |
279 | */ |
280 | static int |
f0f333f4 |
281 | makroom(register DBM *db, long int hash, int need) |
463ee0b2 |
282 | { |
283 | long newp; |
284 | char twin[PBLKSIZ]; |
f6bbbfc7 |
285 | #if defined(DOSISH) || defined(WIN32) |
286 | char zer[PBLKSIZ]; |
287 | long oldtail; |
288 | #endif |
463ee0b2 |
289 | char *pag = db->pagbuf; |
f0f333f4 |
290 | char *New = twin; |
463ee0b2 |
291 | register int smax = SPLTMAX; |
292 | |
293 | do { |
294 | /* |
295 | * split the current page |
296 | */ |
f0f333f4 |
297 | (void) splpage(pag, New, db->hmask + 1); |
463ee0b2 |
298 | /* |
299 | * address of the new page |
300 | */ |
301 | newp = (hash & db->hmask) | (db->hmask + 1); |
302 | |
303 | /* |
304 | * write delay, read avoidence/cache shuffle: |
305 | * select the page for incoming pair: if key is to go to the new page, |
306 | * write out the previous one, and copy the new one over, thus making |
307 | * it the current page. If not, simply write the new page, and we are |
308 | * still looking at the page of interest. current page is not updated |
309 | * here, as sdbm_store will do so, after it inserts the incoming pair. |
310 | */ |
f6bbbfc7 |
311 | |
312 | #if defined(DOSISH) || defined(WIN32) |
313 | /* |
314 | * Fill hole with 0 if made it. |
315 | * (hole is NOT read as 0) |
316 | */ |
317 | oldtail = lseek(db->pagf, 0L, SEEK_END); |
318 | memset(zer, 0, PBLKSIZ); |
319 | while (OFF_PAG(newp) > oldtail) { |
320 | if (lseek(db->pagf, 0L, SEEK_END) < 0 || |
321 | write(db->pagf, zer, PBLKSIZ) < 0) { |
322 | |
323 | return 0; |
324 | } |
325 | oldtail += PBLKSIZ; |
326 | } |
327 | #endif |
463ee0b2 |
328 | if (hash & (db->hmask + 1)) { |
329 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 |
330 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) |
331 | return 0; |
332 | db->pagbno = newp; |
f0f333f4 |
333 | (void) memcpy(pag, New, PBLKSIZ); |
463ee0b2 |
334 | } |
335 | else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0 |
f0f333f4 |
336 | || write(db->pagf, New, PBLKSIZ) < 0) |
463ee0b2 |
337 | return 0; |
338 | |
339 | if (!setdbit(db, db->curbit)) |
340 | return 0; |
341 | /* |
342 | * see if we have enough room now |
343 | */ |
344 | if (fitpair(pag, need)) |
345 | return 1; |
346 | /* |
347 | * try again... update curbit and hmask as getpage would have |
348 | * done. because of our update of the current page, we do not |
349 | * need to read in anything. BUT we have to write the current |
350 | * [deferred] page out, as the window of failure is too great. |
351 | */ |
352 | db->curbit = 2 * db->curbit + |
353 | ((hash & (db->hmask + 1)) ? 2 : 1); |
354 | db->hmask |= db->hmask + 1; |
355 | |
356 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 |
357 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) |
358 | return 0; |
359 | |
360 | } while (--smax); |
361 | /* |
362 | * if we are here, this is real bad news. After SPLTMAX splits, |
363 | * we still cannot fit the key. say goodnight. |
364 | */ |
365 | #ifdef BADMESS |
366 | (void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44); |
367 | #endif |
368 | return 0; |
369 | |
370 | } |
371 | |
372 | /* |
373 | * the following two routines will break if |
374 | * deletions aren't taken into account. (ndbm bug) |
375 | */ |
376 | datum |
f0f333f4 |
377 | sdbm_firstkey(register DBM *db) |
463ee0b2 |
378 | { |
379 | if (db == NULL) |
380 | return errno = EINVAL, nullitem; |
381 | /* |
382 | * start at page 0 |
383 | */ |
384 | if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0 |
385 | || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) |
386 | return ioerr(db), nullitem; |
387 | db->pagbno = 0; |
388 | db->blkptr = 0; |
389 | db->keyptr = 0; |
390 | |
391 | return getnext(db); |
392 | } |
393 | |
394 | datum |
f0f333f4 |
395 | sdbm_nextkey(register DBM *db) |
463ee0b2 |
396 | { |
397 | if (db == NULL) |
398 | return errno = EINVAL, nullitem; |
399 | return getnext(db); |
400 | } |
401 | |
402 | /* |
403 | * all important binary trie traversal |
404 | */ |
405 | static int |
f0f333f4 |
406 | getpage(register DBM *db, register long int hash) |
463ee0b2 |
407 | { |
408 | register int hbit; |
409 | register long dbit; |
410 | register long pagb; |
411 | |
412 | dbit = 0; |
413 | hbit = 0; |
414 | while (dbit < db->maxbno && getdbit(db, dbit)) |
415 | dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1); |
416 | |
417 | debug(("dbit: %d...", dbit)); |
418 | |
419 | db->curbit = dbit; |
420 | db->hmask = masks[hbit]; |
421 | |
422 | pagb = hash & db->hmask; |
423 | /* |
424 | * see if the block we need is already in memory. |
425 | * note: this lookaside cache has about 10% hit rate. |
426 | */ |
427 | if (pagb != db->pagbno) { |
428 | /* |
429 | * note: here, we assume a "hole" is read as 0s. |
430 | * if not, must zero pagbuf first. |
431 | */ |
432 | if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0 |
433 | || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) |
434 | return 0; |
435 | if (!chkpage(db->pagbuf)) |
436 | return 0; |
437 | db->pagbno = pagb; |
438 | |
439 | debug(("pag read: %d\n", pagb)); |
440 | } |
441 | return 1; |
442 | } |
443 | |
444 | static int |
f0f333f4 |
445 | getdbit(register DBM *db, register long int dbit) |
463ee0b2 |
446 | { |
447 | register long c; |
448 | register long dirb; |
449 | |
450 | c = dbit / BYTESIZ; |
451 | dirb = c / DBLKSIZ; |
452 | |
453 | if (dirb != db->dirbno) { |
98627ae8 |
454 | int got; |
463ee0b2 |
455 | if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 |
98627ae8 |
456 | || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0) |
463ee0b2 |
457 | return 0; |
98627ae8 |
458 | if (got==0) |
459 | memset(db->dirbuf,0,DBLKSIZ); |
463ee0b2 |
460 | db->dirbno = dirb; |
461 | |
462 | debug(("dir read: %d\n", dirb)); |
463 | } |
464 | |
465 | return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ); |
466 | } |
467 | |
468 | static int |
f0f333f4 |
469 | setdbit(register DBM *db, register long int dbit) |
463ee0b2 |
470 | { |
471 | register long c; |
472 | register long dirb; |
473 | |
474 | c = dbit / BYTESIZ; |
475 | dirb = c / DBLKSIZ; |
476 | |
477 | if (dirb != db->dirbno) { |
98627ae8 |
478 | int got; |
463ee0b2 |
479 | if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 |
98627ae8 |
480 | || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0) |
463ee0b2 |
481 | return 0; |
98627ae8 |
482 | if (got==0) |
483 | memset(db->dirbuf,0,DBLKSIZ); |
463ee0b2 |
484 | db->dirbno = dirb; |
485 | |
486 | debug(("dir read: %d\n", dirb)); |
487 | } |
488 | |
489 | db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ); |
490 | |
98627ae8 |
491 | #if 0 |
463ee0b2 |
492 | if (dbit >= db->maxbno) |
493 | db->maxbno += DBLKSIZ * BYTESIZ; |
98627ae8 |
494 | #else |
495 | if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno) |
496 | db->maxbno=OFF_DIR((dirb+1))*BYTESIZ; |
497 | #endif |
463ee0b2 |
498 | |
499 | if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 |
500 | || write(db->dirf, db->dirbuf, DBLKSIZ) < 0) |
501 | return 0; |
502 | |
503 | return 1; |
504 | } |
505 | |
506 | /* |
507 | * getnext - get the next key in the page, and if done with |
508 | * the page, try the next page in sequence |
509 | */ |
510 | static datum |
f0f333f4 |
511 | getnext(register DBM *db) |
463ee0b2 |
512 | { |
513 | datum key; |
514 | |
515 | for (;;) { |
516 | db->keyptr++; |
517 | key = getnkey(db->pagbuf, db->keyptr); |
518 | if (key.dptr != NULL) |
519 | return key; |
520 | /* |
521 | * we either run out, or there is nothing on this page.. |
522 | * try the next one... If we lost our position on the |
523 | * file, we will have to seek. |
524 | */ |
525 | db->keyptr = 0; |
526 | if (db->pagbno != db->blkptr++) |
527 | if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0) |
528 | break; |
529 | db->pagbno = db->blkptr; |
530 | if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0) |
531 | break; |
532 | if (!chkpage(db->pagbuf)) |
533 | break; |
534 | } |
535 | |
536 | return ioerr(db), nullitem; |
537 | } |
85e6fe83 |
538 | |