Ruby  2.0.0p247(2013-06-27revision41674)
ext/sdbm/_sdbm.c
Go to the documentation of this file.
00001 /*
00002  * sdbm - ndbm work-alike hashed database library
00003  * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
00004  * author: oz@nexus.yorku.ca
00005  * status: public domain.
00006  *
00007  * core routines
00008  */
00009 
00010 #include "ruby/config.h"
00011 #include "ruby/defines.h"
00012 
00013 #ifdef HAVE_UNISTD_H
00014 #include <unistd.h>
00015 #endif
00016 
00017 #include "sdbm.h"
00018 
00019 /*
00020  * sdbm - ndbm work-alike hashed database library
00021  * tuning and portability constructs [not nearly enough]
00022  * author: oz@nexus.yorku.ca
00023  */
00024 
00025 #define BYTESIZ         8
00026 
00027 #ifdef BSD42
00028 #define SEEK_SET        L_SET
00029 #define memset(s,c,n)   bzero((s), (n))         /* only when c is zero */
00030 #define memcpy(s1,s2,n) bcopy((s2), (s1), (n))
00031 #define memcmp(s1,s2,n) bcmp((s1),(s2),(n))
00032 #endif
00033 
00034 /*
00035  * important tuning parms (hah)
00036  */
00037 
00038 #ifndef SEEDUPS
00039 #define SEEDUPS 1       /* always detect duplicates */
00040 #endif
00041 #ifndef BADMESS
00042 #define BADMESS 1       /* generate a message for worst case:
00043                            cannot make room after SPLTMAX splits */
00044 #endif
00045 
00046 /*
00047  * misc
00048  */
00049 #ifdef DEBUG
00050 #define debug(x)        printf x
00051 #else
00052 #define debug(x)
00053 #endif
00054 
00055 #ifdef BIG_E
00056 #define GET_SHORT(p, i) (((unsigned)((unsigned char *)(p))[(i)*2] << 8) + (((unsigned char *)(p))[(i)*2 + 1]))
00057 #define PUT_SHORT(p, i, s) (((unsigned char *)(p))[(i)*2] = (unsigned char)((s) >> 8), ((unsigned char *)(p))[(i)*2 + 1] = (unsigned char)(s))
00058 #else
00059 #define GET_SHORT(p, i) ((p)[(i)])
00060 #define PUT_SHORT(p, i, s)      ((p)[(i)] = (s))
00061 #endif
00062 
00063 /*#include "pair.h"*/
00064 static int   fitpair proto((char *, int));
00065 static void  putpair proto((char *, datum, datum));
00066 static datum getpair proto((char *, datum));
00067 static int   delpair proto((char *, datum));
00068 static int   chkpage proto((char *));
00069 static datum getnkey proto((char *, int));
00070 static void  splpage proto((char *, char *, long));
00071 #if SEEDUPS
00072 static int   duppair proto((char *, datum));
00073 #endif
00074 
00075 #include <stdio.h>
00076 #include <stdlib.h>
00077 #ifdef DOSISH
00078 #include <io.h>
00079 #endif
00080 #include <sys/types.h>
00081 #include <sys/stat.h>
00082 #ifdef BSD42
00083 #include <sys/file.h>
00084 #else
00085 #include <fcntl.h>
00086 /*#include <memory.h>*/
00087 #endif
00088 #ifndef O_BINARY
00089 #define O_BINARY        0
00090 #endif
00091 
00092 #include <errno.h>
00093 #ifndef EPERM
00094 #define EPERM   EACCES
00095 #endif
00096 #include <string.h>
00097 
00098 #ifdef __STDC__
00099 #include <stddef.h>
00100 #endif
00101 
00102 #ifndef NULL
00103 #define NULL    0
00104 #endif
00105 
00106 /*
00107  * externals
00108  */
00109 #if !defined(__sun) && !defined(_WIN32) && !defined(__CYGWIN__) && !defined(errno)
00110 extern int errno;
00111 #endif
00112 
00113 /*
00114  * forward
00115  */
00116 static int getdbit proto((DBM *, long));
00117 static int setdbit proto((DBM *, long));
00118 static int getpage proto((DBM *, long));
00119 static datum getnext proto((DBM *));
00120 static int makroom proto((DBM *, long, int));
00121 
00122 /*
00123  * useful macros
00124  */
00125 #define bad(x)          ((x).dptr == NULL || (x).dsize < 0)
00126 #define exhash(item)    sdbm_hash((item).dptr, (item).dsize)
00127 #define ioerr(db)       ((db)->flags |= DBM_IOERR)
00128 
00129 #define OFF_PAG(off)    (long) (off) * PBLKSIZ
00130 #define OFF_DIR(off)    (long) (off) * DBLKSIZ
00131 
00132 static long masks[] = {
00133         000000000000L, 000000000001L, 000000000003L,
00134         000000000007L, 000000000017L, 000000000037L,
00135         000000000077L, 000000000177L, 000000000377L,
00136         000000000777L, 000000001777L, 000000003777L,
00137         000000007777L, 000000017777L, 000000037777L,
00138         000000077777L, 000000177777L, 000000377777L,
00139         000000777777L, 000001777777L, 000003777777L,
00140         000007777777L, 000017777777L, 000037777777L,
00141         000077777777L, 000177777777L, 000377777777L,
00142         000777777777L, 001777777777L, 003777777777L,
00143         007777777777L, 017777777777L
00144 };
00145 
00146 datum nullitem = {NULL, 0};
00147 
00148 DBM *
00149 sdbm_open(register char *file, register int flags, register int mode)
00150 {
00151         register DBM *db;
00152         register char *dirname;
00153         register char *pagname;
00154         register size_t n;
00155 
00156         if (file == NULL || !*file)
00157                 return errno = EINVAL, (DBM *) NULL;
00158 /*
00159  * need space for two seperate filenames
00160  */
00161         n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2;
00162 
00163         if ((dirname = malloc(n)) == NULL)
00164                 return errno = ENOMEM, (DBM *) NULL;
00165 /*
00166  * build the file names
00167  */
00168         dirname = strcat(strcpy(dirname, file), DIRFEXT);
00169         pagname = strcpy(dirname + strlen(dirname) + 1, file);
00170         pagname = strcat(pagname, PAGFEXT);
00171 
00172         db = sdbm_prep(dirname, pagname, flags, mode);
00173         free((char *) dirname);
00174         return db;
00175 }
00176 
00177 static int
00178 fd_set_cloexec(int fd)
00179 {
00180   /* MinGW don't have F_GETFD and FD_CLOEXEC.  [ruby-core:40281] */
00181 #ifdef F_GETFD
00182     int flags, ret;
00183     flags = fcntl(fd, F_GETFD); /* should not fail except EBADF. */
00184     if (flags == -1) {
00185         return -1;
00186     }
00187     if (2 < fd) {
00188         if (!(flags & FD_CLOEXEC)) {
00189             flags |= FD_CLOEXEC;
00190             ret = fcntl(fd, F_SETFD, flags);
00191             if (ret == -1) {
00192                 return -1;
00193             }
00194         }
00195     }
00196 #endif
00197     return 0;
00198 }
00199 
00200 DBM *
00201 sdbm_prep(char *dirname, char *pagname, int flags, int mode)
00202 {
00203         register DBM *db;
00204         struct stat dstat;
00205 
00206         if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
00207                 return errno = ENOMEM, (DBM *) NULL;
00208 
00209         db->pagf = -1;
00210         db->dirf = -1;
00211         db->flags = 0;
00212         db->hmask = 0;
00213         db->blkptr = 0;
00214         db->keyptr = 0;
00215 /*
00216  * adjust user flags so that WRONLY becomes RDWR,
00217  * as required by this package. Also set our internal
00218  * flag for RDONLY.
00219  */
00220         if (flags & O_WRONLY)
00221                 flags = (flags & ~O_WRONLY) | O_RDWR;
00222         if (flags & O_RDONLY)
00223                 db->flags = DBM_RDONLY;
00224 /*
00225  * open the files in sequence, and stat the dirfile.
00226  * If we fail anywhere, undo everything, return NULL.
00227  */
00228         flags |= O_BINARY;
00229 #ifdef O_CLOEXEC
00230         flags |= O_CLOEXEC;
00231 #endif
00232 
00233         if ((db->pagf = open(pagname, flags, mode)) == -1) goto err;
00234         if (fd_set_cloexec(db->pagf) == -1) goto err;
00235         if ((db->dirf = open(dirname, flags, mode)) == -1) goto err;
00236         if (fd_set_cloexec(db->dirf) == -1) goto err;
00237 /*
00238  * need the dirfile size to establish max bit number.
00239  */
00240         if (fstat(db->dirf, &dstat) == -1) goto err;
00241 /*
00242  * zero size: either a fresh database, or one with a single,
00243  * unsplit data page: dirpage is all zeros.
00244  */
00245         db->dirbno = (!dstat.st_size) ? 0 : -1;
00246         db->pagbno = -1;
00247         db->maxbno = dstat.st_size * (long) BYTESIZ;
00248 
00249         (void) memset(db->pagbuf, 0, PBLKSIZ);
00250         (void) memset(db->dirbuf, 0, DBLKSIZ);
00251 /*
00252  * success
00253  */
00254         return db;
00255 
00256     err:
00257         if (db->pagf != -1)
00258                 (void) close(db->pagf);
00259         if (db->dirf != -1)
00260                 (void) close(db->dirf);
00261         free((char *) db);
00262         return (DBM *) NULL;
00263 }
00264 
00265 void
00266 sdbm_close(register DBM *db)
00267 {
00268         if (db == NULL)
00269                 errno = EINVAL;
00270         else {
00271                 (void) close(db->dirf);
00272                 (void) close(db->pagf);
00273                 free((char *) db);
00274         }
00275 }
00276 
00277 datum
00278 sdbm_fetch(register DBM *db, datum key)
00279 {
00280         if (db == NULL || bad(key))
00281                 return errno = EINVAL, nullitem;
00282 
00283         if (getpage(db, exhash(key)))
00284                 return getpair(db->pagbuf, key);
00285 
00286         return ioerr(db), nullitem;
00287 }
00288 
00289 int
00290 sdbm_delete(register DBM *db, datum key)
00291 {
00292         if (db == NULL || bad(key))
00293                 return errno = EINVAL, -1;
00294         if (sdbm_rdonly(db))
00295                 return errno = EPERM, -1;
00296 
00297         if (getpage(db, exhash(key))) {
00298                 if (!delpair(db->pagbuf, key))
00299                         return -1;
00300 /*
00301  * update the page file
00302  */
00303                 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
00304                     || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
00305                         return ioerr(db), -1;
00306 
00307                 return 0;
00308         }
00309 
00310         return ioerr(db), -1;
00311 }
00312 
00313 int
00314 sdbm_store(register DBM *db, datum key, datum val, int flags)
00315 {
00316         int need;
00317         register long hash;
00318 
00319         if (db == NULL || bad(key))
00320                 return errno = EINVAL, -1;
00321         if (sdbm_rdonly(db))
00322                 return errno = EPERM, -1;
00323 
00324         need = key.dsize + val.dsize;
00325 /*
00326  * is the pair too big (or too small) for this database ??
00327  */
00328         if (need < 0 || need > PAIRMAX)
00329                 return errno = EINVAL, -1;
00330 
00331         if (getpage(db, (hash = exhash(key)))) {
00332 /*
00333  * if we need to replace, delete the key/data pair
00334  * first. If it is not there, ignore.
00335  */
00336                 if (flags == DBM_REPLACE)
00337                         (void) delpair(db->pagbuf, key);
00338 #if SEEDUPS
00339                 else if (duppair(db->pagbuf, key))
00340                         return 1;
00341 #endif
00342 /*
00343  * if we do not have enough room, we have to split.
00344  */
00345                 if (!fitpair(db->pagbuf, need))
00346                         if (!makroom(db, hash, need))
00347                                 return ioerr(db), -1;
00348 /*
00349  * we have enough room or split is successful. insert the key,
00350  * and update the page file.
00351  */
00352                 (void) putpair(db->pagbuf, key, val);
00353 
00354                 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
00355                     || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
00356                         return ioerr(db), -1;
00357         /*
00358          * success
00359          */
00360                 return 0;
00361         }
00362 
00363         return ioerr(db), -1;
00364 }
00365 
00366 /*
00367  * makroom - make room by splitting the overfull page
00368  * this routine will attempt to make room for SPLTMAX times before
00369  * giving up.
00370  */
00371 static int
00372 makroom(register DBM *db, long int hash, int need)
00373 {
00374         long newp;
00375         char twin[PBLKSIZ];
00376 #if defined _WIN32 && !defined __CYGWIN__
00377         char zer[PBLKSIZ];
00378         long oldtail;
00379 #endif
00380         char *pag = db->pagbuf;
00381         char *new = twin;
00382         register int smax = SPLTMAX;
00383 
00384         do {
00385 /*
00386  * split the current page
00387  */
00388                 (void) splpage(pag, new, db->hmask + 1);
00389 /*
00390  * address of the new page
00391  */
00392                 newp = (hash & db->hmask) | (db->hmask + 1);
00393                 debug(("newp: %ld\n", newp));
00394 /*
00395  * write delay, read avoidence/cache shuffle:
00396  * select the page for incoming pair: if key is to go to the new page,
00397  * write out the previous one, and copy the new one over, thus making
00398  * it the current page. If not, simply write the new page, and we are
00399  * still looking at the page of interest. current page is not updated
00400  * here, as sdbm_store will do so, after it inserts the incoming pair.
00401  */
00402 
00403 #if defined _WIN32 && !defined __CYGWIN__
00404         /*
00405          * Fill hole with 0 if made it.
00406          * (hole is NOT read as 0)
00407          */
00408         oldtail = lseek(db->pagf, 0L, SEEK_END);
00409         memset(zer, 0, PBLKSIZ);
00410         while (OFF_PAG(newp) > oldtail) {
00411                 if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
00412                     write(db->pagf, zer, PBLKSIZ) < 0) {
00413 
00414                         return 0;
00415                 }
00416                 oldtail += PBLKSIZ;
00417         }
00418 #endif
00419 
00420                 if (hash & (db->hmask + 1)) {
00421                         if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
00422                             || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
00423                                 return 0;
00424                         db->pagbno = newp;
00425                         (void) memcpy(pag, new, PBLKSIZ);
00426                 }
00427                 else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
00428                          || write(db->pagf, new, PBLKSIZ) < 0)
00429                         return 0;
00430 
00431                 if (!setdbit(db, db->curbit))
00432                         return 0;
00433 /*
00434  * see if we have enough room now
00435  */
00436                 if (fitpair(pag, need))
00437                         return 1;
00438 /*
00439  * try again... update curbit and hmask as getpage would have
00440  * done. because of our update of the current page, we do not
00441  * need to read in anything. BUT we have to write the current
00442  * [deferred] page out, as the window of failure is too great.
00443  */
00444                 db->curbit = 2 * db->curbit +
00445                         ((hash & (db->hmask + 1)) ? 2 : 1);
00446                 db->hmask |= (db->hmask + 1);
00447 
00448                 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
00449                     || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
00450                         return 0;
00451 
00452         } while (--smax);
00453 /*
00454  * if we are here, this is real bad news. After SPLTMAX splits,
00455  * we still cannot fit the key. say goodnight.
00456  */
00457 #if BADMESS
00458         (void) (write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44) < 0);
00459 #endif
00460         return 0;
00461 
00462 }
00463 
00464 /*
00465  * the following two routines will break if
00466  * deletions aren't taken into account. (ndbm bug)
00467  */
00468 datum
00469 sdbm_firstkey(register DBM *db)
00470 {
00471         if (db == NULL)
00472                 return errno = EINVAL, nullitem;
00473 /*
00474  * start at page 0
00475  */
00476         (void) memset(db->pagbuf, 0, PBLKSIZ);
00477         if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
00478             || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
00479                 return ioerr(db), nullitem;
00480         db->pagbno = 0;
00481         db->blkptr = 0;
00482         db->keyptr = 0;
00483 
00484         return getnext(db);
00485 }
00486 
00487 datum
00488 sdbm_nextkey(register DBM *db)
00489 {
00490         if (db == NULL)
00491                 return errno = EINVAL, nullitem;
00492         return getnext(db);
00493 }
00494 
00495 /*
00496  * all important binary trie traversal
00497  */
00498 static int
00499 getpage(register DBM *db, register long int hash)
00500 {
00501         register int hbit;
00502         register long dbit;
00503         register long pagb;
00504 
00505         dbit = 0;
00506         hbit = 0;
00507         while (dbit < db->maxbno && getdbit(db, dbit))
00508                 dbit = 2 * dbit + ((hash & ((long) 1 << hbit++)) ? 2 : 1);
00509 
00510         debug(("dbit: %d...", dbit));
00511 
00512         db->curbit = dbit;
00513         db->hmask = masks[hbit];
00514 
00515         pagb = hash & db->hmask;
00516 /*
00517  * see if the block we need is already in memory.
00518  * note: this lookaside cache has about 10% hit rate.
00519  */
00520         if (pagb != db->pagbno) {
00521 /*
00522  * note: here, we assume a "hole" is read as 0s.
00523  * if not, must zero pagbuf first.
00524  */
00525                 (void) memset(db->pagbuf, 0, PBLKSIZ);
00526 
00527                 if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
00528                     || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
00529                         return 0;
00530                 if (!chkpage(db->pagbuf)) {
00531                         return 0;
00532                 }
00533                 db->pagbno = pagb;
00534 
00535                 debug(("pag read: %d\n", pagb));
00536         }
00537         return 1;
00538 }
00539 
00540 static int
00541 getdbit(register DBM *db, register long int dbit)
00542 {
00543         register long c;
00544         register long dirb;
00545 
00546         c = dbit / BYTESIZ;
00547         dirb = c / DBLKSIZ;
00548 
00549         if (dirb != db->dirbno) {
00550                 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
00551                     || read(db->dirf, db->dirbuf, DBLKSIZ) < 0)
00552                         return 0;
00553                 db->dirbno = dirb;
00554 
00555                 debug(("dir read: %d\n", dirb));
00556         }
00557 
00558         return db->dirbuf[c % DBLKSIZ] & (1 << (dbit % BYTESIZ));
00559 }
00560 
00561 static int
00562 setdbit(register DBM *db, register long int dbit)
00563 {
00564         register long c;
00565         register long dirb;
00566 
00567         c = dbit / BYTESIZ;
00568         dirb = c / DBLKSIZ;
00569 
00570         if (dirb != db->dirbno) {
00571                 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
00572                     || read(db->dirf, db->dirbuf, DBLKSIZ) < 0)
00573                         return 0;
00574                 db->dirbno = dirb;
00575 
00576                 debug(("dir read: %d\n", dirb));
00577         }
00578 
00579         db->dirbuf[c % DBLKSIZ] |= (1 << (dbit % BYTESIZ));
00580 
00581         if (dbit >= db->maxbno)
00582                 db->maxbno += (long) DBLKSIZ * BYTESIZ;
00583 
00584         if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
00585             || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
00586                 return 0;
00587 
00588         return 1;
00589 }
00590 
00591 /*
00592  * getnext - get the next key in the page, and if done with
00593  * the page, try the next page in sequence
00594  */
00595 static datum
00596 getnext(register DBM *db)
00597 {
00598         datum key;
00599 
00600         for (;;) {
00601                 db->keyptr++;
00602                 key = getnkey(db->pagbuf, db->keyptr);
00603                 if (key.dptr != NULL)
00604                         return key;
00605 /*
00606  * we either run out, or there is nothing on this page..
00607  * try the next one... If we lost our position on the
00608  * file, we will have to seek.
00609  */
00610                 db->keyptr = 0;
00611                 if (db->pagbno != db->blkptr++)
00612                         if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
00613                                 break;
00614                 db->pagbno = db->blkptr;
00615                 if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
00616                         break;
00617                 if (!chkpage(db->pagbuf)) {
00618                         break;
00619                 }
00620         }
00621 
00622         return ioerr(db), nullitem;
00623 }
00624 
00625 /* pair.c */
00626 /*
00627  * sdbm - ndbm work-alike hashed database library
00628  * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
00629  * author: oz@nexus.yorku.ca
00630  * status: public domain.
00631  *
00632  * page-level routines
00633  */
00634 
00635 #ifndef BSD42
00636 /*#include <memory.h>*/
00637 #endif
00638 
00639 #define exhash(item)    sdbm_hash((item).dptr, (item).dsize)
00640 
00641 /*
00642  * forward
00643  */
00644 static int seepair proto((char *, int, char *, int));
00645 
00646 /*
00647  * page format:
00648  *      +------------------------------+
00649  * ino  | n | keyoff | datoff | keyoff |
00650  *      +------------+--------+--------+
00651  *      | datoff | - - - ---->         |
00652  *      +--------+---------------------+
00653  *      |        F R E E A R E A       |
00654  *      +--------------+---------------+
00655  *      |  <---- - - - | data          |
00656  *      +--------+-----+----+----------+
00657  *      |  key   | data     | key      |
00658  *      +--------+----------+----------+
00659  *
00660  * calculating the offsets for free area:  if the number
00661  * of entries (ino[0]) is zero, the offset to the END of
00662  * the free area is the block size. Otherwise, it is the
00663  * nth (ino[ino[0]]) entry's offset.
00664  */
00665 
00666 static int
00667 fitpair(char *pag, int need)
00668 {
00669         register int n;
00670         register int off;
00671         register int free;
00672         register short *ino = (short *) pag;
00673 
00674         off = ((n = GET_SHORT(ino,0)) > 0) ? GET_SHORT(ino,n) : PBLKSIZ;
00675         free = off - (n + 1) * (int)sizeof(short);
00676         need += 2 * (int)sizeof(short);
00677 
00678         debug(("free %d need %d\n", free, need));
00679 
00680         return need <= free;
00681 }
00682 
00683 static void
00684 putpair(char *pag, datum key, datum val)
00685 {
00686         register int n;
00687         register int off;
00688         register short *ino = (short *) pag;
00689 
00690         off = ((n = GET_SHORT(ino,0)) > 0) ? GET_SHORT(ino,n) : PBLKSIZ;
00691 /*
00692  * enter the key first
00693  */
00694         off -= key.dsize;
00695         if (key.dsize)
00696                 (void) memcpy(pag + off, key.dptr, key.dsize);
00697         PUT_SHORT(ino,n + 1,off);
00698 /*
00699  * now the data
00700  */
00701         off -= val.dsize;
00702         if (val.dsize)
00703                 (void) memcpy(pag + off, val.dptr, val.dsize);
00704         PUT_SHORT(ino,n + 2,off);
00705 /*
00706  * adjust item count
00707  */
00708         PUT_SHORT(ino,0,GET_SHORT(ino,0) + 2);
00709 }
00710 
00711 static datum
00712 getpair(char *pag, datum key)
00713 {
00714         register int i;
00715         register int n;
00716         datum val;
00717         register short *ino = (short *) pag;
00718 
00719         if ((n = GET_SHORT(ino,0)) == 0)
00720                 return nullitem;
00721 
00722         if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
00723                 return nullitem;
00724 
00725         val.dptr = pag + GET_SHORT(ino,i + 1);
00726         val.dsize = GET_SHORT(ino,i) - GET_SHORT(ino,i + 1);
00727         return val;
00728 }
00729 
00730 #if SEEDUPS
00731 static int
00732 duppair(char *pag, datum key)
00733 {
00734         register short *ino = (short *) pag;
00735         return GET_SHORT(ino,0) > 0 &&
00736                    seepair(pag, GET_SHORT(ino,0), key.dptr, key.dsize) > 0;
00737 }
00738 #endif
00739 
00740 static datum
00741 getnkey(char *pag, int num)
00742 {
00743         datum key;
00744         register int off;
00745         register short *ino = (short *) pag;
00746 
00747         num = num * 2 - 1;
00748         if (GET_SHORT(ino,0) == 0 || num > GET_SHORT(ino,0))
00749                 return nullitem;
00750 
00751         off = (num > 1) ? GET_SHORT(ino,num - 1) : PBLKSIZ;
00752 
00753         key.dptr = pag + GET_SHORT(ino,num);
00754         key.dsize = off - GET_SHORT(ino,num);
00755 
00756         return key;
00757 }
00758 
00759 static int
00760 delpair(char *pag, datum key)
00761 {
00762         register int n;
00763         register int i;
00764         register short *ino = (short *) pag;
00765 
00766         if ((n = GET_SHORT(ino,0)) == 0)
00767                 return 0;
00768 
00769         if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
00770                 return 0;
00771 /*
00772  * found the key. if it is the last entry
00773  * [i.e. i == n - 1] we just adjust the entry count.
00774  * hard case: move all data down onto the deleted pair,
00775  * shift offsets onto deleted offsets, and adjust them.
00776  * [note: 0 < i < n]
00777  */
00778         if (i < n - 1) {
00779                 register int m;
00780                 register char *dst = pag + (i == 1 ? PBLKSIZ : GET_SHORT(ino,i - 1));
00781                 register char *src = pag + GET_SHORT(ino,i + 1);
00782                 register ptrdiff_t   zoo = dst - src;
00783 
00784                 debug(("free-up %"PRIdPTRDIFF" ", zoo));
00785 /*
00786  * shift data/keys down
00787  */
00788                 m = GET_SHORT(ino,i + 1) - GET_SHORT(ino,n);
00789 #ifdef DUFF
00790 #define MOVB    *--dst = *--src
00791 
00792                 if (m > 0) {
00793                         register int loop = (m + 8 - 1) >> 3;
00794 
00795                         switch (m & (8 - 1)) {
00796                         case 0: do {
00797                                 MOVB;   case 7: MOVB;
00798                         case 6: MOVB;   case 5: MOVB;
00799                         case 4: MOVB;   case 3: MOVB;
00800                         case 2: MOVB;   case 1: MOVB;
00801                                 } while (--loop);
00802                         }
00803                 }
00804 #else
00805 #ifdef MEMMOVE
00806                 memmove(dst, src, m);
00807 #else
00808                 while (m--)
00809                         *--dst = *--src;
00810 #endif
00811 #endif
00812 /*
00813  * adjust offset index up
00814  */
00815                 while (i < n - 1) {
00816                         PUT_SHORT(ino,i, GET_SHORT(ino,i + 2) + zoo);
00817                         i++;
00818                 }
00819         }
00820         PUT_SHORT(ino, 0, GET_SHORT(ino, 0) - 2);
00821         return 1;
00822 }
00823 
00824 /*
00825  * search for the key in the page.
00826  * return offset index in the range 0 < i < n.
00827  * return 0 if not found.
00828  */
00829 static int
00830 seepair(char *pag, register int n, register char *key, register int siz)
00831 {
00832         register int i;
00833         register int off = PBLKSIZ;
00834         register short *ino = (short *) pag;
00835 
00836         for (i = 1; i < n; i += 2) {
00837                 if (siz == off - GET_SHORT(ino,i) &&
00838                     memcmp(key, pag + GET_SHORT(ino,i), siz) == 0)
00839                         return i;
00840                 off = GET_SHORT(ino,i + 1);
00841         }
00842         return 0;
00843 }
00844 
00845 static void
00846 splpage(char *pag, char *new, long int sbit)
00847 {
00848         datum key;
00849         datum val;
00850 
00851         register int n;
00852         register int off = PBLKSIZ;
00853         char cur[PBLKSIZ];
00854         register short *ino = (short *) cur;
00855 
00856         (void) memcpy(cur, pag, PBLKSIZ);
00857         (void) memset(pag, 0, PBLKSIZ);
00858         (void) memset(new, 0, PBLKSIZ);
00859 
00860         n = GET_SHORT(ino,0);
00861         for (ino++; n > 0; ino += 2) {
00862                 key.dptr = cur + GET_SHORT(ino,0);
00863                 key.dsize = off - GET_SHORT(ino,0);
00864                 val.dptr = cur + GET_SHORT(ino,1);
00865                 val.dsize = GET_SHORT(ino,0) - GET_SHORT(ino,1);
00866 /*
00867  * select the page pointer (by looking at sbit) and insert
00868  */
00869                 (void) putpair((exhash(key) & sbit) ? new : pag, key, val);
00870 
00871                 off = GET_SHORT(ino,1);
00872                 n -= 2;
00873         }
00874 
00875         debug(("%d split %d/%d\n", ((short *) cur)[0] / 2,
00876                ((short *) new)[0] / 2,
00877                ((short *) pag)[0] / 2));
00878 }
00879 
00880 /*
00881  * check page sanity:
00882  * number of entries should be something
00883  * reasonable, and all offsets in the index should be in order.
00884  * this could be made more rigorous.
00885  */
00886 static int
00887 chkpage(char *pag)
00888 {
00889         register int n;
00890         register int off;
00891         register short *ino = (short *) pag;
00892 
00893         if ((n = GET_SHORT(ino,0)) < 0 || n > PBLKSIZ / (int)sizeof(short))
00894                 return 0;
00895 
00896         if (n > 0) {
00897                 off = PBLKSIZ;
00898                 for (ino++; n > 0; ino += 2) {
00899                         if (GET_SHORT(ino,0) > off || GET_SHORT(ino,1) > off ||
00900                             GET_SHORT(ino,1) > GET_SHORT(ino,0))
00901                                 return 0;
00902                         off = GET_SHORT(ino,1);
00903                         n -= 2;
00904                 }
00905         }
00906         return 1;
00907 }
00908 
00909 /* hash.c */
00910 /*
00911  * sdbm - ndbm work-alike hashed database library
00912  * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
00913  * author: oz@nexus.yorku.ca
00914  * status: public domain. keep it that way.
00915  *
00916  * hashing routine
00917  */
00918 
00919 /*
00920  * polynomial conversion ignoring overflows
00921  * [this seems to work remarkably well, in fact better
00922  * then the ndbm hash function. Replace at your own risk]
00923  * use: 65599   nice.
00924  *      65587   even better.
00925  */
00926 long
00927 sdbm_hash(register char *str, register int len)
00928 {
00929         register unsigned long n = 0;
00930 
00931 #ifdef DUFF
00932 
00933 #define HASHC   n = *str++ + 65599 * n
00934 
00935         if (len > 0) {
00936                 register int loop = (len + 8 - 1) >> 3;
00937 
00938                 switch(len & (8 - 1)) {
00939                 case 0: do {
00940                         HASHC;  case 7: HASHC;
00941                 case 6: HASHC;  case 5: HASHC;
00942                 case 4: HASHC;  case 3: HASHC;
00943                 case 2: HASHC;  case 1: HASHC;
00944                         } while (--loop);
00945                 }
00946 
00947         }
00948 #else
00949         while (len--)
00950                 n = ((*str++) & 255) + 65587L * n;
00951 #endif
00952         return n;
00953 }
00954