Re: [PATCH regexec.c] lookahead for REF, MINMOD, PLUS, CURLY*
[p5sagit/p5-mst-13.2.git] / ext / SDBM_File / sdbm / sdbm.c
CommitLineData
463ee0b2 1/*
2 * sdbm - ndbm work-alike hashed database library
3 * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
4 * author: oz@nexus.yorku.ca
5 * status: public domain.
6 *
7 * core routines
8 */
9
17f28c40 10#include "INTERN.h"
85e6fe83 11#include "config.h"
4f63d024 12#ifdef WIN32
13#include "io.h"
14#endif
463ee0b2 15#include "sdbm.h"
16#include "tune.h"
17#include "pair.h"
18
85e6fe83 19#ifdef I_FCNTL
20# include <fcntl.h>
463ee0b2 21#endif
85e6fe83 22#ifdef I_SYS_FILE
23# include <sys/file.h>
463ee0b2 24#endif
25
85e6fe83 26#ifdef I_STRING
27# include <string.h>
28#else
29# include <strings.h>
463ee0b2 30#endif
31
32/*
33 * externals
34 */
137443ea 35#ifndef WIN32
463ee0b2 36#ifndef sun
37extern int errno;
38#endif
a5b1c163 39#endif
463ee0b2 40
85e6fe83 41extern Malloc_t malloc proto((MEM_SIZE));
851efeba 42extern Free_t free proto((Malloc_t));
bf0c440f 43
463ee0b2 44/*
45 * forward
46 */
47static int getdbit proto((DBM *, long));
48static int setdbit proto((DBM *, long));
49static int getpage proto((DBM *, long));
50static datum getnext proto((DBM *));
51static int makroom proto((DBM *, long, int));
52
53/*
54 * useful macros
55 */
56#define bad(x) ((x).dptr == NULL || (x).dsize < 0)
57#define exhash(item) sdbm_hash((item).dptr, (item).dsize)
58#define ioerr(db) ((db)->flags |= DBM_IOERR)
59
60#define OFF_PAG(off) (long) (off) * PBLKSIZ
61#define OFF_DIR(off) (long) (off) * DBLKSIZ
62
63static long masks[] = {
64 000000000000, 000000000001, 000000000003, 000000000007,
65 000000000017, 000000000037, 000000000077, 000000000177,
66 000000000377, 000000000777, 000000001777, 000000003777,
67 000000007777, 000000017777, 000000037777, 000000077777,
68 000000177777, 000000377777, 000000777777, 000001777777,
69 000003777777, 000007777777, 000017777777, 000037777777,
70 000077777777, 000177777777, 000377777777, 000777777777,
71 001777777777, 003777777777, 007777777777, 017777777777
72};
73
463ee0b2 74DBM *
f0f333f4 75sdbm_open(register char *file, register int flags, register int mode)
463ee0b2 76{
77 register DBM *db;
78 register char *dirname;
79 register char *pagname;
80 register int n;
81
82 if (file == NULL || !*file)
83 return errno = EINVAL, (DBM *) NULL;
84/*
85 * need space for two seperate filenames
86 */
87 n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2;
88
f0f333f4 89 if ((dirname = (char *) malloc((unsigned) n)) == NULL)
463ee0b2 90 return errno = ENOMEM, (DBM *) NULL;
91/*
92 * build the file names
93 */
94 dirname = strcat(strcpy(dirname, file), DIRFEXT);
95 pagname = strcpy(dirname + strlen(dirname) + 1, file);
96 pagname = strcat(pagname, PAGFEXT);
97
98 db = sdbm_prep(dirname, pagname, flags, mode);
99 free((char *) dirname);
100 return db;
101}
102
103DBM *
f0f333f4 104sdbm_prep(char *dirname, char *pagname, int flags, int mode)
463ee0b2 105{
106 register DBM *db;
107 struct stat dstat;
108
109 if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
110 return errno = ENOMEM, (DBM *) NULL;
111
112 db->flags = 0;
113 db->hmask = 0;
114 db->blkptr = 0;
115 db->keyptr = 0;
116/*
117 * adjust user flags so that WRONLY becomes RDWR,
118 * as required by this package. Also set our internal
119 * flag for RDONLY if needed.
120 */
121 if (flags & O_WRONLY)
122 flags = (flags & ~O_WRONLY) | O_RDWR;
123
124 else if ((flags & 03) == O_RDONLY)
125 db->flags = DBM_RDONLY;
126/*
127 * open the files in sequence, and stat the dirfile.
128 * If we fail anywhere, undo everything, return NULL.
129 */
1761cee5 130#if defined(OS2) || defined(MSDOS) || defined(WIN32) || defined(__CYGWIN__)
4633a7c4 131 flags |= O_BINARY;
132# endif
463ee0b2 133 if ((db->pagf = open(pagname, flags, mode)) > -1) {
134 if ((db->dirf = open(dirname, flags, mode)) > -1) {
135/*
136 * need the dirfile size to establish max bit number.
137 */
138 if (fstat(db->dirf, &dstat) == 0) {
139/*
140 * zero size: either a fresh database, or one with a single,
141 * unsplit data page: dirpage is all zeros.
142 */
143 db->dirbno = (!dstat.st_size) ? 0 : -1;
144 db->pagbno = -1;
145 db->maxbno = dstat.st_size * BYTESIZ;
146
147 (void) memset(db->pagbuf, 0, PBLKSIZ);
148 (void) memset(db->dirbuf, 0, DBLKSIZ);
149 /*
150 * success
151 */
152 return db;
153 }
154 (void) close(db->dirf);
155 }
156 (void) close(db->pagf);
157 }
158 free((char *) db);
159 return (DBM *) NULL;
160}
161
162void
f0f333f4 163sdbm_close(register DBM *db)
463ee0b2 164{
165 if (db == NULL)
166 errno = EINVAL;
167 else {
168 (void) close(db->dirf);
169 (void) close(db->pagf);
170 free((char *) db);
171 }
172}
173
174datum
f0f333f4 175sdbm_fetch(register DBM *db, datum key)
463ee0b2 176{
177 if (db == NULL || bad(key))
178 return errno = EINVAL, nullitem;
179
180 if (getpage(db, exhash(key)))
181 return getpair(db->pagbuf, key);
182
183 return ioerr(db), nullitem;
184}
185
186int
f4b9d880 187sdbm_exists(register DBM *db, datum key)
188{
189 if (db == NULL || bad(key))
190 return errno = EINVAL, -1;
191
192 if (getpage(db, exhash(key)))
193 return exipair(db->pagbuf, key);
194
195 return ioerr(db), -1;
196}
197
198int
f0f333f4 199sdbm_delete(register DBM *db, datum key)
463ee0b2 200{
201 if (db == NULL || bad(key))
202 return errno = EINVAL, -1;
203 if (sdbm_rdonly(db))
204 return errno = EPERM, -1;
205
206 if (getpage(db, exhash(key))) {
207 if (!delpair(db->pagbuf, key))
208 return -1;
209/*
210 * update the page file
211 */
212 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
213 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
214 return ioerr(db), -1;
215
216 return 0;
217 }
218
219 return ioerr(db), -1;
220}
221
222int
f0f333f4 223sdbm_store(register DBM *db, datum key, datum val, int flags)
463ee0b2 224{
225 int need;
226 register long hash;
227
228 if (db == NULL || bad(key))
229 return errno = EINVAL, -1;
230 if (sdbm_rdonly(db))
231 return errno = EPERM, -1;
232
233 need = key.dsize + val.dsize;
234/*
235 * is the pair too big (or too small) for this database ??
236 */
237 if (need < 0 || need > PAIRMAX)
238 return errno = EINVAL, -1;
239
240 if (getpage(db, (hash = exhash(key)))) {
241/*
242 * if we need to replace, delete the key/data pair
243 * first. If it is not there, ignore.
244 */
245 if (flags == DBM_REPLACE)
246 (void) delpair(db->pagbuf, key);
247#ifdef SEEDUPS
248 else if (duppair(db->pagbuf, key))
249 return 1;
250#endif
251/*
252 * if we do not have enough room, we have to split.
253 */
254 if (!fitpair(db->pagbuf, need))
255 if (!makroom(db, hash, need))
256 return ioerr(db), -1;
257/*
258 * we have enough room or split is successful. insert the key,
259 * and update the page file.
260 */
261 (void) putpair(db->pagbuf, key, val);
262
263 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
264 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
265 return ioerr(db), -1;
266 /*
267 * success
268 */
269 return 0;
270 }
271
272 return ioerr(db), -1;
273}
274
275/*
276 * makroom - make room by splitting the overfull page
277 * this routine will attempt to make room for SPLTMAX times before
278 * giving up.
279 */
280static int
f0f333f4 281makroom(register DBM *db, long int hash, int need)
463ee0b2 282{
283 long newp;
284 char twin[PBLKSIZ];
f6bbbfc7 285#if defined(DOSISH) || defined(WIN32)
286 char zer[PBLKSIZ];
287 long oldtail;
288#endif
463ee0b2 289 char *pag = db->pagbuf;
f0f333f4 290 char *New = twin;
463ee0b2 291 register int smax = SPLTMAX;
292
293 do {
294/*
295 * split the current page
296 */
f0f333f4 297 (void) splpage(pag, New, db->hmask + 1);
463ee0b2 298/*
299 * address of the new page
300 */
301 newp = (hash & db->hmask) | (db->hmask + 1);
302
303/*
304 * write delay, read avoidence/cache shuffle:
305 * select the page for incoming pair: if key is to go to the new page,
306 * write out the previous one, and copy the new one over, thus making
307 * it the current page. If not, simply write the new page, and we are
308 * still looking at the page of interest. current page is not updated
309 * here, as sdbm_store will do so, after it inserts the incoming pair.
310 */
f6bbbfc7 311
312#if defined(DOSISH) || defined(WIN32)
313 /*
314 * Fill hole with 0 if made it.
315 * (hole is NOT read as 0)
316 */
317 oldtail = lseek(db->pagf, 0L, SEEK_END);
318 memset(zer, 0, PBLKSIZ);
319 while (OFF_PAG(newp) > oldtail) {
320 if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
321 write(db->pagf, zer, PBLKSIZ) < 0) {
322
323 return 0;
324 }
325 oldtail += PBLKSIZ;
326 }
327#endif
463ee0b2 328 if (hash & (db->hmask + 1)) {
329 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
330 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
331 return 0;
332 db->pagbno = newp;
f0f333f4 333 (void) memcpy(pag, New, PBLKSIZ);
463ee0b2 334 }
335 else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
f0f333f4 336 || write(db->pagf, New, PBLKSIZ) < 0)
463ee0b2 337 return 0;
338
339 if (!setdbit(db, db->curbit))
340 return 0;
341/*
342 * see if we have enough room now
343 */
344 if (fitpair(pag, need))
345 return 1;
346/*
347 * try again... update curbit and hmask as getpage would have
348 * done. because of our update of the current page, we do not
349 * need to read in anything. BUT we have to write the current
350 * [deferred] page out, as the window of failure is too great.
351 */
352 db->curbit = 2 * db->curbit +
353 ((hash & (db->hmask + 1)) ? 2 : 1);
354 db->hmask |= db->hmask + 1;
355
356 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
357 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
358 return 0;
359
360 } while (--smax);
361/*
362 * if we are here, this is real bad news. After SPLTMAX splits,
363 * we still cannot fit the key. say goodnight.
364 */
365#ifdef BADMESS
366 (void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
367#endif
368 return 0;
369
370}
371
372/*
373 * the following two routines will break if
374 * deletions aren't taken into account. (ndbm bug)
375 */
376datum
f0f333f4 377sdbm_firstkey(register DBM *db)
463ee0b2 378{
379 if (db == NULL)
380 return errno = EINVAL, nullitem;
381/*
382 * start at page 0
383 */
384 if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
385 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
386 return ioerr(db), nullitem;
387 db->pagbno = 0;
388 db->blkptr = 0;
389 db->keyptr = 0;
390
391 return getnext(db);
392}
393
394datum
f0f333f4 395sdbm_nextkey(register DBM *db)
463ee0b2 396{
397 if (db == NULL)
398 return errno = EINVAL, nullitem;
399 return getnext(db);
400}
401
402/*
403 * all important binary trie traversal
404 */
405static int
f0f333f4 406getpage(register DBM *db, register long int hash)
463ee0b2 407{
408 register int hbit;
409 register long dbit;
410 register long pagb;
411
412 dbit = 0;
413 hbit = 0;
414 while (dbit < db->maxbno && getdbit(db, dbit))
415 dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);
416
417 debug(("dbit: %d...", dbit));
418
419 db->curbit = dbit;
420 db->hmask = masks[hbit];
421
422 pagb = hash & db->hmask;
423/*
424 * see if the block we need is already in memory.
425 * note: this lookaside cache has about 10% hit rate.
426 */
427 if (pagb != db->pagbno) {
428/*
429 * note: here, we assume a "hole" is read as 0s.
430 * if not, must zero pagbuf first.
431 */
432 if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
433 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
434 return 0;
435 if (!chkpage(db->pagbuf))
436 return 0;
437 db->pagbno = pagb;
438
439 debug(("pag read: %d\n", pagb));
440 }
441 return 1;
442}
443
444static int
f0f333f4 445getdbit(register DBM *db, register long int dbit)
463ee0b2 446{
447 register long c;
448 register long dirb;
449
450 c = dbit / BYTESIZ;
451 dirb = c / DBLKSIZ;
452
453 if (dirb != db->dirbno) {
98627ae8 454 int got;
463ee0b2 455 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
98627ae8 456 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
463ee0b2 457 return 0;
98627ae8 458 if (got==0)
459 memset(db->dirbuf,0,DBLKSIZ);
463ee0b2 460 db->dirbno = dirb;
461
462 debug(("dir read: %d\n", dirb));
463 }
464
465 return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
466}
467
468static int
f0f333f4 469setdbit(register DBM *db, register long int dbit)
463ee0b2 470{
471 register long c;
472 register long dirb;
473
474 c = dbit / BYTESIZ;
475 dirb = c / DBLKSIZ;
476
477 if (dirb != db->dirbno) {
98627ae8 478 int got;
463ee0b2 479 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
98627ae8 480 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
463ee0b2 481 return 0;
98627ae8 482 if (got==0)
483 memset(db->dirbuf,0,DBLKSIZ);
463ee0b2 484 db->dirbno = dirb;
485
486 debug(("dir read: %d\n", dirb));
487 }
488
489 db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);
490
98627ae8 491#if 0
463ee0b2 492 if (dbit >= db->maxbno)
493 db->maxbno += DBLKSIZ * BYTESIZ;
98627ae8 494#else
495 if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno)
496 db->maxbno=OFF_DIR((dirb+1))*BYTESIZ;
497#endif
463ee0b2 498
499 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
500 || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
501 return 0;
502
503 return 1;
504}
505
506/*
507 * getnext - get the next key in the page, and if done with
508 * the page, try the next page in sequence
509 */
510static datum
f0f333f4 511getnext(register DBM *db)
463ee0b2 512{
513 datum key;
514
515 for (;;) {
516 db->keyptr++;
517 key = getnkey(db->pagbuf, db->keyptr);
518 if (key.dptr != NULL)
519 return key;
520/*
521 * we either run out, or there is nothing on this page..
522 * try the next one... If we lost our position on the
523 * file, we will have to seek.
524 */
525 db->keyptr = 0;
526 if (db->pagbno != db->blkptr++)
527 if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
528 break;
529 db->pagbno = db->blkptr;
530 if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
531 break;
532 if (!chkpage(db->pagbuf))
533 break;
534 }
535
536 return ioerr(db), nullitem;
537}
85e6fe83 538