Once again syncing after too long an absence
[p5sagit/p5-mst-13.2.git] / ext / SDBM_File / sdbm / sdbm.c
CommitLineData
463ee0b2 1/*
2 * sdbm - ndbm work-alike hashed database library
3 * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
4 * author: oz@nexus.yorku.ca
5 * status: public domain.
6 *
7 * core routines
8 */
9
17f28c40 10#include "INTERN.h"
85e6fe83 11#include "config.h"
4f63d024 12#ifdef WIN32
13#include "io.h"
14#endif
463ee0b2 15#include "sdbm.h"
16#include "tune.h"
17#include "pair.h"
18
85e6fe83 19#ifdef I_FCNTL
20# include <fcntl.h>
463ee0b2 21#endif
85e6fe83 22#ifdef I_SYS_FILE
23# include <sys/file.h>
463ee0b2 24#endif
25
85e6fe83 26#ifdef I_STRING
27# include <string.h>
28#else
29# include <strings.h>
463ee0b2 30#endif
31
32/*
33 * externals
34 */
137443ea 35#ifndef WIN32
463ee0b2 36#ifndef sun
37extern int errno;
38#endif
39
85e6fe83 40extern Malloc_t malloc proto((MEM_SIZE));
851efeba 41extern Free_t free proto((Malloc_t));
bf0c440f 42
137443ea 43#endif
463ee0b2 44
45/*
46 * forward
47 */
48static int getdbit proto((DBM *, long));
49static int setdbit proto((DBM *, long));
50static int getpage proto((DBM *, long));
51static datum getnext proto((DBM *));
52static int makroom proto((DBM *, long, int));
53
54/*
55 * useful macros
56 */
57#define bad(x) ((x).dptr == NULL || (x).dsize < 0)
58#define exhash(item) sdbm_hash((item).dptr, (item).dsize)
59#define ioerr(db) ((db)->flags |= DBM_IOERR)
60
61#define OFF_PAG(off) (long) (off) * PBLKSIZ
62#define OFF_DIR(off) (long) (off) * DBLKSIZ
63
64static long masks[] = {
65 000000000000, 000000000001, 000000000003, 000000000007,
66 000000000017, 000000000037, 000000000077, 000000000177,
67 000000000377, 000000000777, 000000001777, 000000003777,
68 000000007777, 000000017777, 000000037777, 000000077777,
69 000000177777, 000000377777, 000000777777, 000001777777,
70 000003777777, 000007777777, 000017777777, 000037777777,
71 000077777777, 000177777777, 000377777777, 000777777777,
72 001777777777, 003777777777, 007777777777, 017777777777
73};
74
463ee0b2 75DBM *
f0f333f4 76sdbm_open(register char *file, register int flags, register int mode)
463ee0b2 77{
78 register DBM *db;
79 register char *dirname;
80 register char *pagname;
81 register int n;
82
83 if (file == NULL || !*file)
84 return errno = EINVAL, (DBM *) NULL;
85/*
86 * need space for two seperate filenames
87 */
88 n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2;
89
f0f333f4 90 if ((dirname = (char *) malloc((unsigned) n)) == NULL)
463ee0b2 91 return errno = ENOMEM, (DBM *) NULL;
92/*
93 * build the file names
94 */
95 dirname = strcat(strcpy(dirname, file), DIRFEXT);
96 pagname = strcpy(dirname + strlen(dirname) + 1, file);
97 pagname = strcat(pagname, PAGFEXT);
98
99 db = sdbm_prep(dirname, pagname, flags, mode);
100 free((char *) dirname);
101 return db;
102}
103
104DBM *
f0f333f4 105sdbm_prep(char *dirname, char *pagname, int flags, int mode)
463ee0b2 106{
107 register DBM *db;
108 struct stat dstat;
109
110 if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
111 return errno = ENOMEM, (DBM *) NULL;
112
113 db->flags = 0;
114 db->hmask = 0;
115 db->blkptr = 0;
116 db->keyptr = 0;
117/*
118 * adjust user flags so that WRONLY becomes RDWR,
119 * as required by this package. Also set our internal
120 * flag for RDONLY if needed.
121 */
122 if (flags & O_WRONLY)
123 flags = (flags & ~O_WRONLY) | O_RDWR;
124
125 else if ((flags & 03) == O_RDONLY)
126 db->flags = DBM_RDONLY;
127/*
128 * open the files in sequence, and stat the dirfile.
129 * If we fail anywhere, undo everything, return NULL.
130 */
1761cee5 131#if defined(OS2) || defined(MSDOS) || defined(WIN32) || defined(__CYGWIN__)
4633a7c4 132 flags |= O_BINARY;
133# endif
463ee0b2 134 if ((db->pagf = open(pagname, flags, mode)) > -1) {
135 if ((db->dirf = open(dirname, flags, mode)) > -1) {
136/*
137 * need the dirfile size to establish max bit number.
138 */
139 if (fstat(db->dirf, &dstat) == 0) {
140/*
141 * zero size: either a fresh database, or one with a single,
142 * unsplit data page: dirpage is all zeros.
143 */
144 db->dirbno = (!dstat.st_size) ? 0 : -1;
145 db->pagbno = -1;
146 db->maxbno = dstat.st_size * BYTESIZ;
147
148 (void) memset(db->pagbuf, 0, PBLKSIZ);
149 (void) memset(db->dirbuf, 0, DBLKSIZ);
150 /*
151 * success
152 */
153 return db;
154 }
155 (void) close(db->dirf);
156 }
157 (void) close(db->pagf);
158 }
159 free((char *) db);
160 return (DBM *) NULL;
161}
162
163void
f0f333f4 164sdbm_close(register DBM *db)
463ee0b2 165{
166 if (db == NULL)
167 errno = EINVAL;
168 else {
169 (void) close(db->dirf);
170 (void) close(db->pagf);
171 free((char *) db);
172 }
173}
174
175datum
f0f333f4 176sdbm_fetch(register DBM *db, datum key)
463ee0b2 177{
178 if (db == NULL || bad(key))
179 return errno = EINVAL, nullitem;
180
181 if (getpage(db, exhash(key)))
182 return getpair(db->pagbuf, key);
183
184 return ioerr(db), nullitem;
185}
186
187int
f4b9d880 188sdbm_exists(register DBM *db, datum key)
189{
190 if (db == NULL || bad(key))
191 return errno = EINVAL, -1;
192
193 if (getpage(db, exhash(key)))
194 return exipair(db->pagbuf, key);
195
196 return ioerr(db), -1;
197}
198
199int
f0f333f4 200sdbm_delete(register DBM *db, datum key)
463ee0b2 201{
202 if (db == NULL || bad(key))
203 return errno = EINVAL, -1;
204 if (sdbm_rdonly(db))
205 return errno = EPERM, -1;
206
207 if (getpage(db, exhash(key))) {
208 if (!delpair(db->pagbuf, key))
209 return -1;
210/*
211 * update the page file
212 */
213 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
214 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
215 return ioerr(db), -1;
216
217 return 0;
218 }
219
220 return ioerr(db), -1;
221}
222
223int
f0f333f4 224sdbm_store(register DBM *db, datum key, datum val, int flags)
463ee0b2 225{
226 int need;
227 register long hash;
228
229 if (db == NULL || bad(key))
230 return errno = EINVAL, -1;
231 if (sdbm_rdonly(db))
232 return errno = EPERM, -1;
233
234 need = key.dsize + val.dsize;
235/*
236 * is the pair too big (or too small) for this database ??
237 */
238 if (need < 0 || need > PAIRMAX)
239 return errno = EINVAL, -1;
240
241 if (getpage(db, (hash = exhash(key)))) {
242/*
243 * if we need to replace, delete the key/data pair
244 * first. If it is not there, ignore.
245 */
246 if (flags == DBM_REPLACE)
247 (void) delpair(db->pagbuf, key);
248#ifdef SEEDUPS
249 else if (duppair(db->pagbuf, key))
250 return 1;
251#endif
252/*
253 * if we do not have enough room, we have to split.
254 */
255 if (!fitpair(db->pagbuf, need))
256 if (!makroom(db, hash, need))
257 return ioerr(db), -1;
258/*
259 * we have enough room or split is successful. insert the key,
260 * and update the page file.
261 */
262 (void) putpair(db->pagbuf, key, val);
263
264 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
265 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
266 return ioerr(db), -1;
267 /*
268 * success
269 */
270 return 0;
271 }
272
273 return ioerr(db), -1;
274}
275
276/*
277 * makroom - make room by splitting the overfull page
278 * this routine will attempt to make room for SPLTMAX times before
279 * giving up.
280 */
281static int
f0f333f4 282makroom(register DBM *db, long int hash, int need)
463ee0b2 283{
284 long newp;
285 char twin[PBLKSIZ];
0e06870b 286#if defined(DOSISH) || defined(WIN32)
287 char zer[PBLKSIZ];
288 long oldtail;
289#endif
463ee0b2 290 char *pag = db->pagbuf;
f0f333f4 291 char *New = twin;
463ee0b2 292 register int smax = SPLTMAX;
293
294 do {
295/*
296 * split the current page
297 */
f0f333f4 298 (void) splpage(pag, New, db->hmask + 1);
463ee0b2 299/*
300 * address of the new page
301 */
302 newp = (hash & db->hmask) | (db->hmask + 1);
303
304/*
305 * write delay, read avoidence/cache shuffle:
306 * select the page for incoming pair: if key is to go to the new page,
307 * write out the previous one, and copy the new one over, thus making
308 * it the current page. If not, simply write the new page, and we are
309 * still looking at the page of interest. current page is not updated
310 * here, as sdbm_store will do so, after it inserts the incoming pair.
311 */
0e06870b 312
313#if defined(DOSISH) || defined(WIN32)
314 /*
315 * Fill hole with 0 if made it.
316 * (hole is NOT read as 0)
317 */
318 oldtail = lseek(db->pagf, 0L, SEEK_END);
319 memset(zer, 0, PBLKSIZ);
320 while (OFF_PAG(newp) > oldtail) {
321 if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
322 write(db->pagf, zer, PBLKSIZ) < 0) {
323
324 return 0;
325 }
326 oldtail += PBLKSIZ;
327 }
328#endif
463ee0b2 329 if (hash & (db->hmask + 1)) {
330 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
331 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
332 return 0;
333 db->pagbno = newp;
f0f333f4 334 (void) memcpy(pag, New, PBLKSIZ);
463ee0b2 335 }
336 else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
f0f333f4 337 || write(db->pagf, New, PBLKSIZ) < 0)
463ee0b2 338 return 0;
339
340 if (!setdbit(db, db->curbit))
341 return 0;
342/*
343 * see if we have enough room now
344 */
345 if (fitpair(pag, need))
346 return 1;
347/*
348 * try again... update curbit and hmask as getpage would have
349 * done. because of our update of the current page, we do not
350 * need to read in anything. BUT we have to write the current
351 * [deferred] page out, as the window of failure is too great.
352 */
353 db->curbit = 2 * db->curbit +
354 ((hash & (db->hmask + 1)) ? 2 : 1);
355 db->hmask |= db->hmask + 1;
356
357 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
358 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
359 return 0;
360
361 } while (--smax);
362/*
363 * if we are here, this is real bad news. After SPLTMAX splits,
364 * we still cannot fit the key. say goodnight.
365 */
366#ifdef BADMESS
367 (void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
368#endif
369 return 0;
370
371}
372
373/*
374 * the following two routines will break if
375 * deletions aren't taken into account. (ndbm bug)
376 */
377datum
f0f333f4 378sdbm_firstkey(register DBM *db)
463ee0b2 379{
380 if (db == NULL)
381 return errno = EINVAL, nullitem;
382/*
383 * start at page 0
384 */
385 if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
386 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
387 return ioerr(db), nullitem;
388 db->pagbno = 0;
389 db->blkptr = 0;
390 db->keyptr = 0;
391
392 return getnext(db);
393}
394
395datum
f0f333f4 396sdbm_nextkey(register DBM *db)
463ee0b2 397{
398 if (db == NULL)
399 return errno = EINVAL, nullitem;
400 return getnext(db);
401}
402
403/*
404 * all important binary trie traversal
405 */
406static int
f0f333f4 407getpage(register DBM *db, register long int hash)
463ee0b2 408{
409 register int hbit;
410 register long dbit;
411 register long pagb;
412
413 dbit = 0;
414 hbit = 0;
415 while (dbit < db->maxbno && getdbit(db, dbit))
416 dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);
417
418 debug(("dbit: %d...", dbit));
419
420 db->curbit = dbit;
421 db->hmask = masks[hbit];
422
423 pagb = hash & db->hmask;
424/*
425 * see if the block we need is already in memory.
426 * note: this lookaside cache has about 10% hit rate.
427 */
428 if (pagb != db->pagbno) {
429/*
430 * note: here, we assume a "hole" is read as 0s.
431 * if not, must zero pagbuf first.
432 */
433 if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
434 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
435 return 0;
436 if (!chkpage(db->pagbuf))
437 return 0;
438 db->pagbno = pagb;
439
440 debug(("pag read: %d\n", pagb));
441 }
442 return 1;
443}
444
445static int
f0f333f4 446getdbit(register DBM *db, register long int dbit)
463ee0b2 447{
448 register long c;
449 register long dirb;
450
451 c = dbit / BYTESIZ;
452 dirb = c / DBLKSIZ;
453
454 if (dirb != db->dirbno) {
98627ae8 455 int got;
463ee0b2 456 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
98627ae8 457 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
463ee0b2 458 return 0;
98627ae8 459 if (got==0)
460 memset(db->dirbuf,0,DBLKSIZ);
463ee0b2 461 db->dirbno = dirb;
462
463 debug(("dir read: %d\n", dirb));
464 }
465
466 return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
467}
468
469static int
f0f333f4 470setdbit(register DBM *db, register long int dbit)
463ee0b2 471{
472 register long c;
473 register long dirb;
474
475 c = dbit / BYTESIZ;
476 dirb = c / DBLKSIZ;
477
478 if (dirb != db->dirbno) {
98627ae8 479 int got;
463ee0b2 480 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
98627ae8 481 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
463ee0b2 482 return 0;
98627ae8 483 if (got==0)
484 memset(db->dirbuf,0,DBLKSIZ);
463ee0b2 485 db->dirbno = dirb;
486
487 debug(("dir read: %d\n", dirb));
488 }
489
490 db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);
491
98627ae8 492#if 0
463ee0b2 493 if (dbit >= db->maxbno)
494 db->maxbno += DBLKSIZ * BYTESIZ;
98627ae8 495#else
496 if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno)
497 db->maxbno=OFF_DIR((dirb+1))*BYTESIZ;
498#endif
463ee0b2 499
500 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
501 || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
502 return 0;
503
504 return 1;
505}
506
507/*
508 * getnext - get the next key in the page, and if done with
509 * the page, try the next page in sequence
510 */
511static datum
f0f333f4 512getnext(register DBM *db)
463ee0b2 513{
514 datum key;
515
516 for (;;) {
517 db->keyptr++;
518 key = getnkey(db->pagbuf, db->keyptr);
519 if (key.dptr != NULL)
520 return key;
521/*
522 * we either run out, or there is nothing on this page..
523 * try the next one... If we lost our position on the
524 * file, we will have to seek.
525 */
526 db->keyptr = 0;
527 if (db->pagbno != db->blkptr++)
528 if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
529 break;
530 db->pagbno = db->blkptr;
531 if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
532 break;
533 if (!chkpage(db->pagbuf))
534 break;
535 }
536
537 return ioerr(db), nullitem;
538}
85e6fe83 539