Factor records system mgt into recindex.c, records.c.
authorAdam Dickmeiss <adam@indexdata.dk>
Fri, 23 Nov 2007 13:52:52 +0000 (13:52 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Fri, 23 Nov 2007 13:52:52 +0000 (13:52 +0000)
index/Makefile.am
index/recindex.c
index/recindex.h
index/recindxp.h
index/records.c [new file with mode: 0644]

index a4c84b6..2391b2e 100644 (file)
@@ -1,4 +1,4 @@
-## $Id: Makefile.am,v 1.64 2007-11-01 14:56:07 adam Exp $
+## $Id: Makefile.am,v 1.65 2007-11-23 13:52:52 adam Exp $
 
 aux_libs = \
  ../rset/libidzebra-rset.la \
@@ -93,7 +93,8 @@ libidzebra_2_0_la_SOURCES = \
   limit.c \
   orddict.c orddict.h \
   rank.h rank1.c ranksimilarity.c rankstatic.c \
-  recindex.c  recindex.h recindxp.h reckeys.c reckeys.h recstat.c retrieve.c \
+  records.c recindex.c recindex.h recindxp.h reckeys.c reckeys.h \
+  recstat.c retrieve.c \
   rpnscan.c rpnsearch.c rpnfacet.c sortidx.c symtab.c stream.c \
   update_path.c update_file.c trunc.c untrans.c isam_methods.c \
   zaptterm.c zebraapi.c zinfo.c zinfo.h zsets.c key_block.c key_block.h \
index 8de2ca8..a8f35df 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: recindex.c,v 1.57 2007-11-23 13:11:08 adam Exp $
+/* $Id: recindex.c,v 1.58 2007-11-23 13:52:52 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -20,70 +20,57 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 */
 
-#define RIDX_CHUNK 128
-
-/*
- *  Format of first block
- *      next       (8 bytes)
- *      ref_count  (2 bytes)
- *      block      (500 bytes)
- *
- *  Format of subsequent blocks 
- *      next  (8 bytes)
- *      block (502 bytes)
- *
- *  Format of each record
- *      sysno
- *      (length, data) - pairs
- *      length = 0 if same as previous
- */
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
 #include <string.h>
 
 #include <yaz/yaz-util.h>
-#include "recindxp.h"
+#include "recindex.h"
 
-#if HAVE_BZLIB_H
-#include <bzlib.h>
-#endif
+#define RIDX_CHUNK 128
 
-/* Modify argument to if below: 1=normal, 0=sysno testing */
-#if 1
-/* If this is used sysno are not converted (no testing) */
-#define FAKE_OFFSET 0
-#define USUAL_RANGE 6000000000LL
 
-#else
-/* Use a fake > 2^32 offset so we can test for proper 64-bit handling */
-#define FAKE_OFFSET 6000000000LL
-#define USUAL_RANGE 2000000000LL
-#endif
+struct recindex {
+    char *index_fname;
+    BFile index_BFile;
+};
 
-static zint rec_sysno_to_ext(zint sysno)
+recindex_t recindex_open(BFiles bfs, int rw)
 {
-    assert(sysno >= 0 && sysno <= USUAL_RANGE);
-    return sysno + FAKE_OFFSET;
+    recindex_t p = xmalloc(sizeof(*p));
+    p->index_fname = "reci";
+    p->index_BFile = bf_open(bfs, p->index_fname, RIDX_CHUNK, rw);
+    if (p->index_BFile == NULL)
+    {
+        yaz_log(YLOG_FATAL|YLOG_ERRNO, "open %s", p->index_fname);
+       xfree(p);
+       return 0;
+    }
+    return p;
 }
 
-zint rec_sysno_to_int(zint sysno)
+void recindex_close(recindex_t p)
 {
-    assert(sysno >= FAKE_OFFSET && sysno <= FAKE_OFFSET + USUAL_RANGE);
-    return sysno - FAKE_OFFSET;
+    if (p)
+    {
+        if (p->index_BFile)
+            bf_close(p->index_BFile);
+        xfree(p);
+    }
 }
 
-static int rec_read_head(recindex_t p, void *buf)
+int recindex_read_head(recindex_t p, void *buf)
 {
     return bf_read(p->index_BFile, 0, 0, 0, buf);
 }
 
-static const char *recindex_get_fname(recindex_t p)
+const char *recindex_get_fname(recindex_t p)
 {
     return p->index_fname;
 }
 
-static ZEBRA_RES rec_write_head(recindex_t p, const void *buf, size_t len)
+ZEBRA_RES recindex_write_head(recindex_t p, const void *buf, size_t len)
 {
     int r;
 
@@ -99,20 +86,8 @@ static ZEBRA_RES rec_write_head(recindex_t p, const void *buf, size_t len)
     return ZEBRA_OK;
 }
 
-static void rec_tmp_expand(Records p, int size)
-{
-    if (p->tmp_size < size + 2048 ||
-        p->tmp_size < p->head.block_size[REC_BLOCK_TYPES-1]*2)
-    {
-        xfree(p->tmp_buf);
-        p->tmp_size = size + (int)
-                       (p->head.block_size[REC_BLOCK_TYPES-1])*2 + 2048;
-        p->tmp_buf = (char *) xmalloc(p->tmp_size);
-    }
-}
-
-static int read_indx(recindex_t p, zint sysno, void *buf, int itemsize, 
-                     int ignoreError)
+int recindex_read_indx(recindex_t p, zint sysno, void *buf, int itemsize, 
+                       int ignoreError)
 {
     int r;
     zint pos = (sysno-1)*itemsize;
@@ -134,7 +109,7 @@ static int read_indx(recindex_t p, zint sysno, void *buf, int itemsize,
     return r;
 }
 
-static void write_indx(recindex_t p, zint sysno, void *buf, int itemsize)
+void recindex_write_indx(recindex_t p, zint sysno, void *buf, int itemsize)
 {
     zint pos = (sysno-1)*itemsize;
     int off = CAST_ZINT_TO_INT(pos%RIDX_CHUNK);
@@ -149,871 +124,6 @@ static void write_indx(recindex_t p, zint sysno, void *buf, int itemsize)
                (char*) buf + sz1);
 }
 
-static ZEBRA_RES rec_release_blocks(Records p, zint sysno)
-{
-    struct record_index_entry entry;
-    zint freeblock;
-    char block_and_ref[sizeof(zint) + sizeof(short)];
-    int dst_type;
-    int first = 1;
-
-    if (read_indx(p->recindex, sysno, &entry, sizeof(entry), 1) != 1)
-        return ZEBRA_FAIL;
-
-    freeblock = entry.next;
-    assert(freeblock > 0);
-    dst_type = CAST_ZINT_TO_INT(freeblock & 7);
-    assert(dst_type < REC_BLOCK_TYPES);
-    freeblock = freeblock / 8;
-    while (freeblock)
-    {
-        if (bf_read(p->data_BFile[dst_type], freeblock, 0,
-                    first ? sizeof(block_and_ref) : sizeof(zint),
-                    block_and_ref) != 1)
-        {
-            yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in rec_del_single");
-           return ZEBRA_FAIL;
-        }
-       if (first)
-       {
-           short ref;
-           memcpy(&ref, block_and_ref + sizeof(freeblock), sizeof(ref));
-           --ref;
-           memcpy(block_and_ref + sizeof(freeblock), &ref, sizeof(ref));
-           if (ref)
-           {
-               if (bf_write(p->data_BFile[dst_type], freeblock, 0,
-                             sizeof(block_and_ref), block_and_ref))
-               {
-                   yaz_log(YLOG_FATAL|YLOG_ERRNO, "write in rec_del_single");
-                   return ZEBRA_FAIL;
-               }
-               return ZEBRA_OK;
-           }
-           first = 0;
-       }
-       
-        if (bf_write(p->data_BFile[dst_type], freeblock, 0, sizeof(freeblock),
-                      &p->head.block_free[dst_type]))
-        {
-            yaz_log(YLOG_FATAL|YLOG_ERRNO, "write in rec_del_single");
-           return ZEBRA_FAIL;
-        }
-        p->head.block_free[dst_type] = freeblock;
-        memcpy(&freeblock, block_and_ref, sizeof(freeblock));
-
-        p->head.block_used[dst_type]--;
-    }
-    p->head.total_bytes -= entry.size;
-    return ZEBRA_OK;
-}
-
-static ZEBRA_RES rec_delete_single(Records p, Record rec)
-{
-    struct record_index_entry entry;
-
-    /* all data in entry must be reset, since it's written verbatim */
-    memset(&entry, '\0', sizeof(entry));
-    if (rec_release_blocks(p, rec_sysno_to_int(rec->sysno)) != ZEBRA_OK)
-       return ZEBRA_FAIL;
-
-    entry.next = p->head.index_free;
-    entry.size = 0;
-    p->head.index_free = rec_sysno_to_int(rec->sysno);
-    write_indx(p->recindex, rec_sysno_to_int(rec->sysno), &entry, sizeof(entry));
-    return ZEBRA_OK;
-}
-
-static ZEBRA_RES rec_write_tmp_buf(Records p, int size, zint *sysnos)
-{
-    struct record_index_entry entry;
-    int no_written = 0;
-    char *cptr = p->tmp_buf;
-    zint block_prev = -1, block_free;
-    int dst_type = 0;
-    int i;
-
-    /* all data in entry must be reset, since it's written verbatim */
-    memset(&entry, '\0', sizeof(entry));
-
-    for (i = 1; i<REC_BLOCK_TYPES; i++)
-        if (size >= p->head.block_move[i])
-            dst_type = i;
-    while (no_written < size)
-    {
-        block_free = p->head.block_free[dst_type];
-        if (block_free)
-        {
-            if (bf_read(p->data_BFile[dst_type],
-                         block_free, 0, sizeof(*p->head.block_free),
-                         &p->head.block_free[dst_type]) != 1)
-            {
-                yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in %s at free block "
-                        ZINT_FORMAT,
-                        p->data_fname[dst_type], block_free);
-               return ZEBRA_FAIL;
-            }
-        }
-        else
-            block_free = p->head.block_last[dst_type]++;
-        if (block_prev == -1)
-        {
-            entry.next = block_free*8 + dst_type;
-            entry.size = size;
-            p->head.total_bytes += size;
-           while (*sysnos > 0)
-           {
-               write_indx(p->recindex, *sysnos, &entry, sizeof(entry));
-               sysnos++;
-           }
-        }
-        else
-        {
-            memcpy(cptr, &block_free, sizeof(block_free));
-            bf_write(p->data_BFile[dst_type], block_prev, 0, 0, cptr);
-            cptr = p->tmp_buf + no_written;
-        }
-        block_prev = block_free;
-        no_written += CAST_ZINT_TO_INT(p->head.block_size[dst_type]) 
-            - sizeof(zint);
-        p->head.block_used[dst_type]++;
-    }
-    assert(block_prev != -1);
-    block_free = 0;
-    memcpy(cptr, &block_free, sizeof(block_free));
-    bf_write(p->data_BFile[dst_type], block_prev, 0,
-              sizeof(block_free) + (p->tmp_buf+size) - cptr, cptr);
-    return ZEBRA_OK;
-}
-
-recindex_t recindex_open(BFiles bfs, int rw)
-{
-    recindex_t p = xmalloc(sizeof(*p));
-    p->index_fname = "reci";
-    p->index_BFile = bf_open(bfs, p->index_fname, RIDX_CHUNK, rw);
-    if (p->index_BFile == NULL)
-    {
-        yaz_log(YLOG_FATAL|YLOG_ERRNO, "open %s", p->index_fname);
-       xfree(p);
-       return 0;
-    }
-    return p;
-}
-
-void recindex_close(recindex_t p)
-{
-    if (p)
-    {
-        if (p->index_BFile)
-            bf_close(p->index_BFile);
-        xfree(p);
-    }
-}
-
-
-Records rec_open(BFiles bfs, int rw, int compression_method)
-{
-    Records p;
-    int i, r;
-    int version;
-    ZEBRA_RES ret = ZEBRA_OK;
-
-    p = (Records) xmalloc(sizeof(*p));
-    memset(&p->head, '\0', sizeof(p->head));
-    p->compression_method = compression_method;
-    p->rw = rw;
-    p->tmp_size = 1024;
-    p->recindex = recindex_open(bfs, rw);
-    p->tmp_buf = (char *) xmalloc(p->tmp_size);
-    r = rec_read_head(p->recindex, p->tmp_buf);
-    switch (r)
-    {
-    case 0:
-        memcpy(p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic));
-       sprintf(p->head.version, "%3d", REC_VERSION);
-        p->head.index_free = 0;
-        p->head.index_last = 1;
-        p->head.no_records = 0;
-        p->head.total_bytes = 0;
-        for (i = 0; i<REC_BLOCK_TYPES; i++)
-        {
-            p->head.block_free[i] = 0;
-            p->head.block_last[i] = 1;
-            p->head.block_used[i] = 0;
-        }
-        p->head.block_size[0] = 128;
-        p->head.block_move[0] = 0;
-        for (i = 1; i<REC_BLOCK_TYPES; i++)
-        {
-            p->head.block_size[i] = p->head.block_size[i-1] * 4;
-            p->head.block_move[i] = p->head.block_size[i] * 24;
-        }
-        if (rw)
-       {
-            if (rec_write_head(p->recindex, &p->head, sizeof(p->head)) != ZEBRA_OK)
-               ret = ZEBRA_FAIL;
-       }
-        break;
-    case 1:
-        memcpy(&p->head, p->tmp_buf, sizeof(p->head));
-        if (memcmp(p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic)))
-        {
-            yaz_log(YLOG_FATAL, "file %s has bad format",
-                    recindex_get_fname(p->recindex));
-           ret = ZEBRA_FAIL;
-        }
-       version = atoi(p->head.version);
-       if (version != REC_VERSION)
-       {
-           yaz_log(YLOG_FATAL, "file %s is version %d, but version"
-                 " %d is required",
-                    recindex_get_fname(p->recindex), version, REC_VERSION);
-           ret = ZEBRA_FAIL;
-       }
-        break;
-    }
-    for (i = 0; i<REC_BLOCK_TYPES; i++)
-    {
-        char str[80];
-        sprintf(str, "recd%c", i + 'A');
-        p->data_fname[i] = (char *) xmalloc(strlen(str)+1);
-        strcpy(p->data_fname[i], str);
-        p->data_BFile[i] = NULL;
-    }
-    for (i = 0; i<REC_BLOCK_TYPES; i++)
-    {
-        if (!(p->data_BFile[i] =
-              bf_open(bfs, p->data_fname[i],
-                      CAST_ZINT_TO_INT(p->head.block_size[i]), rw)))
-        {
-            yaz_log(YLOG_FATAL|YLOG_ERRNO, "bf_open %s", p->data_fname[i]);
-           ret = ZEBRA_FAIL;
-            break;
-        }
-    }
-    p->cache_max = 400;
-    p->cache_cur = 0;
-    p->record_cache = (struct record_cache_entry *)
-       xmalloc(sizeof(*p->record_cache)*p->cache_max);
-    zebra_mutex_init(&p->mutex);
-    if (ret == ZEBRA_FAIL)
-       rec_close(&p);
-    return p;
-}
-
-static void rec_encode_unsigned(unsigned n, unsigned char *buf, int *len)
-{
-    (*len) = 0;
-    while (n > 127)
-    {
-       buf[*len] = 128 + (n & 127);
-       n = n >> 7;
-       (*len)++;
-    }
-    buf[*len] = n;
-    (*len)++;
-}
-
-static void rec_decode_unsigned(unsigned *np, unsigned char *buf, int *len)
-{
-    unsigned n = 0;
-    unsigned w = 1;
-    (*len) = 0;
-
-    while (buf[*len] > 127)
-    {
-       n += w*(buf[*len] & 127);
-       w = w << 7;
-       (*len)++;
-    }
-    n += w * buf[*len];
-    (*len)++;
-    *np = n;
-}
-
-static void rec_encode_zint(zint n, unsigned char *buf, int *len)
-{
-    (*len) = 0;
-    while (n > 127)
-    {
-       buf[*len] = (unsigned) (128 + (n & 127));
-       n = n >> 7;
-       (*len)++;
-    }
-    buf[*len] = (unsigned) n;
-    (*len)++;
-}
-
-static void rec_decode_zint(zint *np, unsigned char *buf, int *len)
-{
-    zint  n = 0;
-    zint w = 1;
-    (*len) = 0;
-
-    while (buf[*len] > 127)
-    {
-       n += w*(buf[*len] & 127);
-       w = w << 7;
-       (*len)++;
-    }
-    n += w * buf[*len];
-    (*len)++;
-    *np = n;
-}
-
-static void rec_cache_flush_block1(Records p, Record rec, Record last_rec,
-                                  char **out_buf, int *out_size,
-                                  int *out_offset)
-{
-    int i;
-    int len;
-
-    for (i = 0; i<REC_NO_INFO; i++)
-    {
-       if (*out_offset + CAST_ZINT_TO_INT(rec->size[i]) + 20 > *out_size)
-       {
-           int new_size = *out_offset + rec->size[i] + 65536;
-           char *np = (char *) xmalloc(new_size);
-           if (*out_offset)
-               memcpy(np, *out_buf, *out_offset);
-           xfree(*out_buf);
-           *out_size = new_size;
-           *out_buf = np;
-       }
-       if (i == 0)
-       {
-           rec_encode_zint(rec_sysno_to_int(rec->sysno), 
-                           (unsigned char *) *out_buf + *out_offset, &len);
-           (*out_offset) += len;
-       }
-       if (rec->size[i] == 0)
-       {
-           rec_encode_unsigned(1, (unsigned char *) *out_buf + *out_offset,
-                               &len);
-           (*out_offset) += len;
-       }
-       else if (last_rec && rec->size[i] == last_rec->size[i] &&
-                !memcmp(rec->info[i], last_rec->info[i], rec->size[i]))
-       {
-           rec_encode_unsigned(0, (unsigned char *) *out_buf + *out_offset,
-                               &len);
-           (*out_offset) += len;
-       }
-       else
-       {
-           rec_encode_unsigned(rec->size[i]+1,
-                               (unsigned char *) *out_buf + *out_offset,
-                               &len);
-           (*out_offset) += len;
-           memcpy(*out_buf + *out_offset, rec->info[i], rec->size[i]);
-           (*out_offset) += rec->size[i];
-       }
-    }
-}
-
-static ZEBRA_RES rec_write_multiple(Records p, int saveCount)
-{
-    int i;
-    short ref_count = 0;
-    char compression_method;
-    Record last_rec = 0;
-    int out_size = 1000;
-    int out_offset = 0;
-    char *out_buf = (char *) xmalloc(out_size);
-    zint *sysnos = (zint *) xmalloc(sizeof(*sysnos) * (p->cache_cur + 1));
-    zint *sysnop = sysnos;
-    ZEBRA_RES ret = ZEBRA_OK;
-
-    for (i = 0; i<p->cache_cur - saveCount; i++)
-    {
-        struct record_cache_entry *e = p->record_cache + i;
-        switch (e->flag)
-        {
-        case recordFlagNew:
-            rec_cache_flush_block1(p, e->rec, last_rec, &out_buf,
-                                   &out_size, &out_offset);
-           *sysnop++ = rec_sysno_to_int(e->rec->sysno);
-           ref_count++;
-           e->flag = recordFlagNop;
-           last_rec = e->rec;
-            break;
-        case recordFlagWrite:
-           if (rec_release_blocks(p, rec_sysno_to_int(e->rec->sysno))
-               != ZEBRA_OK)
-               ret = ZEBRA_FAIL;
-
-            rec_cache_flush_block1(p, e->rec, last_rec, &out_buf,
-                                   &out_size, &out_offset);
-           *sysnop++ = rec_sysno_to_int(e->rec->sysno);
-           ref_count++;
-           e->flag = recordFlagNop;
-           last_rec = e->rec;
-            break;
-        case recordFlagDelete:
-            if (rec_delete_single(p, e->rec) != ZEBRA_OK)
-               ret = ZEBRA_FAIL;
-
-           e->flag = recordFlagNop;
-            break;
-       default:
-           break;
-        }
-    }
-
-    *sysnop = -1;
-    if (ref_count)
-    {
-       unsigned int csize = 0;  /* indicate compression "not performed yet" */
-       compression_method = p->compression_method;
-       switch (compression_method)
-       {
-       case REC_COMPRESS_BZIP2:
-#if HAVE_BZLIB_H       
-           csize = out_offset + (out_offset >> 6) + 620;
-           rec_tmp_expand(p, csize);
-#ifdef BZ_CONFIG_ERROR
-           i = BZ2_bzBuffToBuffCompress 
-#else
-           i = bzBuffToBuffCompress 
-#endif
-                                   (p->tmp_buf+sizeof(zint)+sizeof(short)+
-                                     sizeof(char),
-                                     &csize, out_buf, out_offset, 1, 0, 30);
-           if (i != BZ_OK)
-           {
-               yaz_log(YLOG_WARN, "bzBuffToBuffCompress error code=%d", i);
-               csize = 0;
-           }
-           yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count, out_offset,
-                 csize);
-#endif
-           break;
-       case REC_COMPRESS_NONE:
-           break;
-       }
-       if (!csize)  
-       {
-           /* either no compression or compression not supported ... */
-           csize = out_offset;
-           rec_tmp_expand(p, csize);
-           memcpy(p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char),
-                   out_buf, out_offset);
-           csize = out_offset;
-           compression_method = REC_COMPRESS_NONE;
-       }
-       memcpy(p->tmp_buf + sizeof(zint), &ref_count, sizeof(ref_count));
-       memcpy(p->tmp_buf + sizeof(zint)+sizeof(short),
-               &compression_method, sizeof(compression_method));
-               
-       /* -------- compression */
-       if (rec_write_tmp_buf(p, csize + sizeof(short) + sizeof(char), sysnos)
-           != ZEBRA_OK)
-           ret = ZEBRA_FAIL;
-    }
-    xfree(out_buf);
-    xfree(sysnos);
-    return ret;
-}
-
-static ZEBRA_RES rec_cache_flush(Records p, int saveCount)
-{
-    int i, j;
-    ZEBRA_RES ret;
-
-    if (saveCount >= p->cache_cur)
-        saveCount = 0;
-
-    ret = rec_write_multiple(p, saveCount);
-
-    for (i = 0; i<p->cache_cur - saveCount; i++)
-    {
-        struct record_cache_entry *e = p->record_cache + i;
-        rec_free(&e->rec);
-    } 
-    /* i still being used ... */
-    for (j = 0; j<saveCount; j++, i++)
-        memcpy(p->record_cache+j, p->record_cache+i,
-                sizeof(*p->record_cache));
-    p->cache_cur = saveCount;
-    return ret;
-}
-
-static Record *rec_cache_lookup(Records p, zint sysno,
-                               enum recordCacheFlag flag)
-{
-    int i;
-    for (i = 0; i<p->cache_cur; i++)
-    {
-        struct record_cache_entry *e = p->record_cache + i;
-        if (e->rec->sysno == sysno)
-        {
-            if (flag != recordFlagNop && e->flag == recordFlagNop)
-                e->flag = flag;
-            return &e->rec;
-        }
-    }
-    return NULL;
-}
-
-static ZEBRA_RES rec_cache_insert(Records p, Record rec, enum recordCacheFlag flag)
-{
-    struct record_cache_entry *e;
-    ZEBRA_RES ret = ZEBRA_OK;
-
-    if (p->cache_cur == p->cache_max)
-        ret = rec_cache_flush(p, 1);
-    else if (p->cache_cur > 0)
-    {
-        int i, j;
-        int used = 0;
-        for (i = 0; i<p->cache_cur; i++)
-        {
-            Record r = (p->record_cache + i)->rec;
-            for (j = 0; j<REC_NO_INFO; j++)
-                used += r->size[j];
-        }
-        if (used > 90000)
-            ret = rec_cache_flush(p, 1);
-    }
-    assert(p->cache_cur < p->cache_max);
-
-    e = p->record_cache + (p->cache_cur)++;
-    e->flag = flag;
-    e->rec = rec_cp(rec);
-    return ret;
-}
-
-ZEBRA_RES rec_close(Records *pp)
-{
-    Records p = *pp;
-    int i;
-    ZEBRA_RES ret = ZEBRA_OK;
-
-    if (!p)
-       return ret;
-
-    zebra_mutex_destroy(&p->mutex);
-    if (rec_cache_flush(p, 0) != ZEBRA_OK)
-       ret = ZEBRA_FAIL;
-
-    xfree(p->record_cache);
-
-    if (p->rw)
-    {
-        if (rec_write_head(p->recindex, &p->head, sizeof(p->head)) != ZEBRA_OK)
-           ret = ZEBRA_FAIL;
-    }
-
-    recindex_close(p->recindex);
-
-    for (i = 0; i<REC_BLOCK_TYPES; i++)
-    {
-        if (p->data_BFile[i])
-            bf_close(p->data_BFile[i]);
-        xfree(p->data_fname[i]);
-    }
-    xfree(p->tmp_buf);
-    xfree(p);
-    *pp = NULL;
-    return ret;
-}
-
-static Record rec_get_int(Records p, zint sysno)
-{
-    int i, in_size, r;
-    Record rec, *recp;
-    struct record_index_entry entry;
-    zint freeblock;
-    int dst_type;
-    char *nptr, *cptr;
-    char *in_buf = 0;
-    char *bz_buf = 0;
-#if HAVE_BZLIB_H
-    unsigned int bz_size;
-#endif
-    char compression_method;
-
-    assert(sysno > 0);
-    assert(p);
-
-    if ((recp = rec_cache_lookup(p, sysno, recordFlagNop)))
-        return rec_cp(*recp);
-
-    if (read_indx(p->recindex, rec_sysno_to_int(sysno), &entry, sizeof(entry), 1) < 1)
-        return NULL;       /* record is not there! */
-
-    if (!entry.size)
-        return NULL;       /* record is deleted */
-
-    dst_type = (int) (entry.next & 7);
-    assert(dst_type < REC_BLOCK_TYPES);
-    freeblock = entry.next / 8;
-
-    assert(freeblock > 0);
-    
-    rec_tmp_expand(p, entry.size);
-
-    cptr = p->tmp_buf;
-    r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr);
-    if (r < 0)
-       return 0;
-    memcpy(&freeblock, cptr, sizeof(freeblock));
-
-    while (freeblock)
-    {
-        zint tmp;
-
-        cptr += p->head.block_size[dst_type] - sizeof(freeblock);
-        
-        memcpy(&tmp, cptr, sizeof(tmp));
-        r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr);
-       if (r < 0)
-           return 0;
-        memcpy(&freeblock, cptr, sizeof(freeblock));
-        memcpy(cptr, &tmp, sizeof(tmp));
-    }
-
-    rec = (Record) xmalloc(sizeof(*rec));
-    rec->sysno = sysno;
-    memcpy(&compression_method, p->tmp_buf + sizeof(zint) + sizeof(short),
-           sizeof(compression_method));
-    in_buf = p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char);
-    in_size = entry.size - sizeof(short) - sizeof(char);
-    switch (compression_method)
-    {
-    case REC_COMPRESS_BZIP2:
-#if HAVE_BZLIB_H
-       bz_size = entry.size * 20 + 100;
-       while (1)
-       {
-           bz_buf = (char *) xmalloc(bz_size);
-#ifdef BZ_CONFIG_ERROR
-           i = BZ2_bzBuffToBuffDecompress
-#else
-           i = bzBuffToBuffDecompress
-#endif
-                (bz_buf, &bz_size, in_buf, in_size, 0, 0);
-           yaz_log(YLOG_LOG, "decompress %5d %5d", in_size, bz_size);
-           if (i == BZ_OK)
-               break;
-           yaz_log(YLOG_LOG, "failed");
-           xfree(bz_buf);
-            bz_size *= 2;
-       }
-       in_buf = bz_buf;
-       in_size = bz_size;
-#else
-       yaz_log(YLOG_FATAL, "cannot decompress record(s) in BZIP2 format");
-       return 0;
-#endif
-       break;
-    case REC_COMPRESS_NONE:
-       break;
-    }
-    for (i = 0; i<REC_NO_INFO; i++)
-       rec->info[i] = 0;
-
-    nptr = in_buf;                /* skip ref count */
-    while (nptr < in_buf + in_size)
-    {
-       zint this_sysno;
-       int len;
-       rec_decode_zint(&this_sysno, (unsigned char *) nptr, &len);
-       nptr += len;
-
-       for (i = 0; i < REC_NO_INFO; i++)
-       {
-           unsigned int this_size;
-           rec_decode_unsigned(&this_size, (unsigned char *) nptr, &len);
-           nptr += len;
-
-           if (this_size == 0)
-               continue;
-           rec->size[i] = this_size-1;
-
-           if (rec->size[i])
-           {
-               rec->info[i] = nptr;
-               nptr += rec->size[i];
-           }
-           else
-               rec->info[i] = NULL;
-       }
-       if (this_sysno == rec_sysno_to_int(sysno))
-           break;
-    }
-    for (i = 0; i<REC_NO_INFO; i++)
-    {
-       if (rec->info[i] && rec->size[i])
-       {
-           char *np = xmalloc(rec->size[i]+1);
-           memcpy(np, rec->info[i], rec->size[i]);
-            np[rec->size[i]] = '\0';
-           rec->info[i] = np;
-       }
-       else
-       {
-           assert(rec->info[i] == 0);
-           assert(rec->size[i] == 0);
-       }
-    }
-    xfree(bz_buf);
-    if (rec_cache_insert(p, rec, recordFlagNop) != ZEBRA_OK)
-       return 0;
-    return rec;
-}
-
-Record rec_get(Records p, zint sysno)
-{
-    Record rec;
-    zebra_mutex_lock(&p->mutex);
-
-    rec = rec_get_int(p, sysno);
-    zebra_mutex_unlock(&p->mutex);
-    return rec;
-}
-
-Record rec_get_root(Records p)
-{
-    return rec_get(p, rec_sysno_to_ext(1));
-}
-
-static Record rec_new_int(Records p)
-{
-    int i;
-    zint sysno;
-    Record rec;
-
-    assert(p);
-    rec = (Record) xmalloc(sizeof(*rec));
-    if (1 || p->head.index_free == 0)
-        sysno = (p->head.index_last)++;
-    else
-    {
-        struct record_index_entry entry;
-
-        if (read_indx(p->recindex, p->head.index_free, &entry, sizeof(entry), 0) < 1)
-       {
-           xfree(rec);
-           return 0;
-       }
-        sysno = p->head.index_free;
-        p->head.index_free = entry.next;
-    }
-    (p->head.no_records)++;
-    rec->sysno = rec_sysno_to_ext(sysno);
-    for (i = 0; i < REC_NO_INFO; i++)
-    {
-        rec->info[i] = NULL;
-        rec->size[i] = 0;
-    }
-    rec_cache_insert(p, rec, recordFlagNew);
-    return rec;
-}
-
-Record rec_new(Records p)
-{
-    Record rec;
-    zebra_mutex_lock(&p->mutex);
-
-    rec = rec_new_int(p);
-    zebra_mutex_unlock(&p->mutex);
-    return rec;
-}
-
-ZEBRA_RES rec_del(Records p, Record *recpp)
-{
-    Record *recp;
-    ZEBRA_RES ret = ZEBRA_OK;
-
-    zebra_mutex_lock(&p->mutex);
-    (p->head.no_records)--;
-    if ((recp = rec_cache_lookup(p, (*recpp)->sysno, recordFlagDelete)))
-    {
-        rec_free(recp);
-        *recp = *recpp;
-    }
-    else
-    {
-        ret = rec_cache_insert(p, *recpp, recordFlagDelete);
-        rec_free(recpp);
-    }
-    zebra_mutex_unlock(&p->mutex);
-    *recpp = NULL;
-    return ret;
-}
-
-ZEBRA_RES rec_put(Records p, Record *recpp)
-{
-    Record *recp;
-    ZEBRA_RES ret = ZEBRA_OK;
-
-    zebra_mutex_lock(&p->mutex);
-    if ((recp = rec_cache_lookup(p, (*recpp)->sysno, recordFlagWrite)))
-    {
-        rec_free(recp);
-        *recp = *recpp;
-    }
-    else
-    {
-        ret = rec_cache_insert(p, *recpp, recordFlagWrite);
-        rec_free(recpp);
-    }
-    zebra_mutex_unlock(&p->mutex);
-    *recpp = NULL;
-    return ret;
-}
-
-void rec_free(Record *recpp)
-{
-    int i;
-
-    if (!*recpp)
-        return ;
-    for (i = 0; i < REC_NO_INFO; i++)
-        xfree((*recpp)->info[i]);
-    xfree(*recpp);
-    *recpp = NULL;
-}
-
-Record rec_cp(Record rec)
-{
-    Record n;
-    int i;
-
-    n = (Record) xmalloc(sizeof(*n));
-    n->sysno = rec->sysno;
-    for (i = 0; i < REC_NO_INFO; i++)
-        if (!rec->info[i])
-        {
-            n->info[i] = NULL;
-            n->size[i] = 0;
-        }
-        else
-        {
-            n->size[i] = rec->size[i];
-            n->info[i] = (char *) xmalloc(rec->size[i]+1);
-            memcpy(n->info[i], rec->info[i], rec->size[i]);
-            n->info[i][rec->size[i]] = '\0';
-        }
-    return n;
-}
-
-
-char *rec_strdup(const char *s, size_t *len)
-{
-    char *p;
-
-    if (!s)
-    {
-        *len = 0;
-        return NULL;
-    }
-    *len = strlen(s)+1;
-    p = (char *) xmalloc(*len);
-    strcpy(p, s);
-    return p;
-}
 
 /*
  * Local variables:
index fce488f..b92bea7 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: recindex.h,v 1.31 2007-01-15 20:08:25 adam Exp $
+/* $Id: recindex.h,v 1.32 2007-11-23 13:52:52 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -107,6 +107,17 @@ enum {
     recInfo_sortKeys
 };
 
+typedef struct recindex *recindex_t;
+
+recindex_t recindex_open(BFiles bfs, int rw);
+void recindex_close(recindex_t p);
+int recindex_read_head(recindex_t p, void *buf);
+const char *recindex_get_fname(recindex_t p);
+ZEBRA_RES recindex_write_head(recindex_t p, const void *buf, size_t len);
+int recindex_read_indx(recindex_t p, zint sysno, void *buf, int itemsize, 
+                       int ignoreError);
+void recindex_write_indx(recindex_t p, zint sysno, void *buf, int itemsize);
+
 YAZ_END_CDECL
 #endif
 /*
index 13452b4..a38ceb4 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: recindxp.h,v 1.22 2007-11-23 13:11:08 adam Exp $
+/* $Id: recindxp.h,v 1.23 2007-11-23 13:52:52 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -30,13 +30,6 @@ YAZ_BEGIN_CDECL
 #define REC_HEAD_MAGIC "recindex"
 #define REC_VERSION 5
 
-struct recindex {
-    char *index_fname;
-    BFile index_BFile;
-};
-
-typedef struct recindex *recindex_t;
-
 struct records_info {
     int rw;
     int compression_method;
diff --git a/index/records.c b/index/records.c
new file mode 100644 (file)
index 0000000..4a24e35
--- /dev/null
@@ -0,0 +1,934 @@
+/* $Id: records.c,v 1.1 2007-11-23 13:52:52 adam Exp $
+   Copyright (C) 1995-2007
+   Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+*/
+
+/*
+ *  Format of first block
+ *      next       (8 bytes)
+ *      ref_count  (2 bytes)
+ *      block      (500 bytes)
+ *
+ *  Format of subsequent blocks 
+ *      next  (8 bytes)
+ *      block (502 bytes)
+ *
+ *  Format of each record
+ *      sysno
+ *      (length, data) - pairs
+ *      length = 0 if same as previous
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+
+#include <yaz/yaz-util.h>
+#include "recindxp.h"
+
+#if HAVE_BZLIB_H
+#include <bzlib.h>
+#endif
+
+/* Modify argument to if below: 1=normal, 0=sysno testing */
+#if 1
+/* If this is used sysno are not converted (no testing) */
+#define FAKE_OFFSET 0
+#define USUAL_RANGE 6000000000LL
+
+#else
+/* Use a fake > 2^32 offset so we can test for proper 64-bit handling */
+#define FAKE_OFFSET 6000000000LL
+#define USUAL_RANGE 2000000000LL
+#endif
+
+static zint rec_sysno_to_ext(zint sysno)
+{
+    assert(sysno >= 0 && sysno <= USUAL_RANGE);
+    return sysno + FAKE_OFFSET;
+}
+
+zint rec_sysno_to_int(zint sysno)
+{
+    assert(sysno >= FAKE_OFFSET && sysno <= FAKE_OFFSET + USUAL_RANGE);
+    return sysno - FAKE_OFFSET;
+}
+
+static void rec_tmp_expand(Records p, int size)
+{
+    if (p->tmp_size < size + 2048 ||
+        p->tmp_size < p->head.block_size[REC_BLOCK_TYPES-1]*2)
+    {
+        xfree(p->tmp_buf);
+        p->tmp_size = size + (int)
+                       (p->head.block_size[REC_BLOCK_TYPES-1])*2 + 2048;
+        p->tmp_buf = (char *) xmalloc(p->tmp_size);
+    }
+}
+
+static ZEBRA_RES rec_release_blocks(Records p, zint sysno)
+{
+    struct record_index_entry entry;
+    zint freeblock;
+    char block_and_ref[sizeof(zint) + sizeof(short)];
+    int dst_type;
+    int first = 1;
+
+    if (recindex_read_indx(p->recindex, sysno, &entry, sizeof(entry), 1) != 1)
+        return ZEBRA_FAIL;
+
+    freeblock = entry.next;
+    assert(freeblock > 0);
+    dst_type = CAST_ZINT_TO_INT(freeblock & 7);
+    assert(dst_type < REC_BLOCK_TYPES);
+    freeblock = freeblock / 8;
+    while (freeblock)
+    {
+        if (bf_read(p->data_BFile[dst_type], freeblock, 0,
+                    first ? sizeof(block_and_ref) : sizeof(zint),
+                    block_and_ref) != 1)
+        {
+            yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in rec_del_single");
+           return ZEBRA_FAIL;
+        }
+       if (first)
+       {
+           short ref;
+           memcpy(&ref, block_and_ref + sizeof(freeblock), sizeof(ref));
+           --ref;
+           memcpy(block_and_ref + sizeof(freeblock), &ref, sizeof(ref));
+           if (ref)
+           {
+               if (bf_write(p->data_BFile[dst_type], freeblock, 0,
+                             sizeof(block_and_ref), block_and_ref))
+               {
+                   yaz_log(YLOG_FATAL|YLOG_ERRNO, "write in rec_del_single");
+                   return ZEBRA_FAIL;
+               }
+               return ZEBRA_OK;
+           }
+           first = 0;
+       }
+       
+        if (bf_write(p->data_BFile[dst_type], freeblock, 0, sizeof(freeblock),
+                      &p->head.block_free[dst_type]))
+        {
+            yaz_log(YLOG_FATAL|YLOG_ERRNO, "write in rec_del_single");
+           return ZEBRA_FAIL;
+        }
+        p->head.block_free[dst_type] = freeblock;
+        memcpy(&freeblock, block_and_ref, sizeof(freeblock));
+
+        p->head.block_used[dst_type]--;
+    }
+    p->head.total_bytes -= entry.size;
+    return ZEBRA_OK;
+}
+
+static ZEBRA_RES rec_delete_single(Records p, Record rec)
+{
+    struct record_index_entry entry;
+
+    /* all data in entry must be reset, since it's written verbatim */
+    memset(&entry, '\0', sizeof(entry));
+    if (rec_release_blocks(p, rec_sysno_to_int(rec->sysno)) != ZEBRA_OK)
+       return ZEBRA_FAIL;
+
+    entry.next = p->head.index_free;
+    entry.size = 0;
+    p->head.index_free = rec_sysno_to_int(rec->sysno);
+    recindex_write_indx(p->recindex, rec_sysno_to_int(rec->sysno), &entry, sizeof(entry));
+    return ZEBRA_OK;
+}
+
+static ZEBRA_RES rec_write_tmp_buf(Records p, int size, zint *sysnos)
+{
+    struct record_index_entry entry;
+    int no_written = 0;
+    char *cptr = p->tmp_buf;
+    zint block_prev = -1, block_free;
+    int dst_type = 0;
+    int i;
+
+    /* all data in entry must be reset, since it's written verbatim */
+    memset(&entry, '\0', sizeof(entry));
+
+    for (i = 1; i<REC_BLOCK_TYPES; i++)
+        if (size >= p->head.block_move[i])
+            dst_type = i;
+    while (no_written < size)
+    {
+        block_free = p->head.block_free[dst_type];
+        if (block_free)
+        {
+            if (bf_read(p->data_BFile[dst_type],
+                         block_free, 0, sizeof(*p->head.block_free),
+                         &p->head.block_free[dst_type]) != 1)
+            {
+                yaz_log(YLOG_FATAL|YLOG_ERRNO, "read in %s at free block "
+                        ZINT_FORMAT,
+                        p->data_fname[dst_type], block_free);
+               return ZEBRA_FAIL;
+            }
+        }
+        else
+            block_free = p->head.block_last[dst_type]++;
+        if (block_prev == -1)
+        {
+            entry.next = block_free*8 + dst_type;
+            entry.size = size;
+            p->head.total_bytes += size;
+           while (*sysnos > 0)
+           {
+               recindex_write_indx(p->recindex, *sysnos, &entry, sizeof(entry));
+               sysnos++;
+           }
+        }
+        else
+        {
+            memcpy(cptr, &block_free, sizeof(block_free));
+            bf_write(p->data_BFile[dst_type], block_prev, 0, 0, cptr);
+            cptr = p->tmp_buf + no_written;
+        }
+        block_prev = block_free;
+        no_written += CAST_ZINT_TO_INT(p->head.block_size[dst_type]) 
+            - sizeof(zint);
+        p->head.block_used[dst_type]++;
+    }
+    assert(block_prev != -1);
+    block_free = 0;
+    memcpy(cptr, &block_free, sizeof(block_free));
+    bf_write(p->data_BFile[dst_type], block_prev, 0,
+              sizeof(block_free) + (p->tmp_buf+size) - cptr, cptr);
+    return ZEBRA_OK;
+}
+
+Records rec_open(BFiles bfs, int rw, int compression_method)
+{
+    Records p;
+    int i, r;
+    int version;
+    ZEBRA_RES ret = ZEBRA_OK;
+
+    p = (Records) xmalloc(sizeof(*p));
+    memset(&p->head, '\0', sizeof(p->head));
+    p->compression_method = compression_method;
+    p->rw = rw;
+    p->tmp_size = 1024;
+    p->recindex = recindex_open(bfs, rw);
+    p->tmp_buf = (char *) xmalloc(p->tmp_size);
+    r = recindex_read_head(p->recindex, p->tmp_buf);
+    switch (r)
+    {
+    case 0:
+        memcpy(p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic));
+       sprintf(p->head.version, "%3d", REC_VERSION);
+        p->head.index_free = 0;
+        p->head.index_last = 1;
+        p->head.no_records = 0;
+        p->head.total_bytes = 0;
+        for (i = 0; i<REC_BLOCK_TYPES; i++)
+        {
+            p->head.block_free[i] = 0;
+            p->head.block_last[i] = 1;
+            p->head.block_used[i] = 0;
+        }
+        p->head.block_size[0] = 128;
+        p->head.block_move[0] = 0;
+        for (i = 1; i<REC_BLOCK_TYPES; i++)
+        {
+            p->head.block_size[i] = p->head.block_size[i-1] * 4;
+            p->head.block_move[i] = p->head.block_size[i] * 24;
+        }
+        if (rw)
+       {
+            if (recindex_write_head(p->recindex, &p->head, sizeof(p->head)) != ZEBRA_OK)
+               ret = ZEBRA_FAIL;
+       }
+        break;
+    case 1:
+        memcpy(&p->head, p->tmp_buf, sizeof(p->head));
+        if (memcmp(p->head.magic, REC_HEAD_MAGIC, sizeof(p->head.magic)))
+        {
+            yaz_log(YLOG_FATAL, "file %s has bad format",
+                    recindex_get_fname(p->recindex));
+           ret = ZEBRA_FAIL;
+        }
+       version = atoi(p->head.version);
+       if (version != REC_VERSION)
+       {
+           yaz_log(YLOG_FATAL, "file %s is version %d, but version"
+                 " %d is required",
+                    recindex_get_fname(p->recindex), version, REC_VERSION);
+           ret = ZEBRA_FAIL;
+       }
+        break;
+    }
+    for (i = 0; i<REC_BLOCK_TYPES; i++)
+    {
+        char str[80];
+        sprintf(str, "recd%c", i + 'A');
+        p->data_fname[i] = (char *) xmalloc(strlen(str)+1);
+        strcpy(p->data_fname[i], str);
+        p->data_BFile[i] = NULL;
+    }
+    for (i = 0; i<REC_BLOCK_TYPES; i++)
+    {
+        if (!(p->data_BFile[i] =
+              bf_open(bfs, p->data_fname[i],
+                      CAST_ZINT_TO_INT(p->head.block_size[i]), rw)))
+        {
+            yaz_log(YLOG_FATAL|YLOG_ERRNO, "bf_open %s", p->data_fname[i]);
+           ret = ZEBRA_FAIL;
+            break;
+        }
+    }
+    p->cache_max = 400;
+    p->cache_cur = 0;
+    p->record_cache = (struct record_cache_entry *)
+       xmalloc(sizeof(*p->record_cache)*p->cache_max);
+    zebra_mutex_init(&p->mutex);
+    if (ret == ZEBRA_FAIL)
+       rec_close(&p);
+    return p;
+}
+
+static void rec_encode_unsigned(unsigned n, unsigned char *buf, int *len)
+{
+    (*len) = 0;
+    while (n > 127)
+    {
+       buf[*len] = 128 + (n & 127);
+       n = n >> 7;
+       (*len)++;
+    }
+    buf[*len] = n;
+    (*len)++;
+}
+
+static void rec_decode_unsigned(unsigned *np, unsigned char *buf, int *len)
+{
+    unsigned n = 0;
+    unsigned w = 1;
+    (*len) = 0;
+
+    while (buf[*len] > 127)
+    {
+       n += w*(buf[*len] & 127);
+       w = w << 7;
+       (*len)++;
+    }
+    n += w * buf[*len];
+    (*len)++;
+    *np = n;
+}
+
+static void rec_encode_zint(zint n, unsigned char *buf, int *len)
+{
+    (*len) = 0;
+    while (n > 127)
+    {
+       buf[*len] = (unsigned) (128 + (n & 127));
+       n = n >> 7;
+       (*len)++;
+    }
+    buf[*len] = (unsigned) n;
+    (*len)++;
+}
+
+static void rec_decode_zint(zint *np, unsigned char *buf, int *len)
+{
+    zint  n = 0;
+    zint w = 1;
+    (*len) = 0;
+
+    while (buf[*len] > 127)
+    {
+       n += w*(buf[*len] & 127);
+       w = w << 7;
+       (*len)++;
+    }
+    n += w * buf[*len];
+    (*len)++;
+    *np = n;
+}
+
+static void rec_cache_flush_block1(Records p, Record rec, Record last_rec,
+                                  char **out_buf, int *out_size,
+                                  int *out_offset)
+{
+    int i;
+    int len;
+
+    for (i = 0; i<REC_NO_INFO; i++)
+    {
+       if (*out_offset + CAST_ZINT_TO_INT(rec->size[i]) + 20 > *out_size)
+       {
+           int new_size = *out_offset + rec->size[i] + 65536;
+           char *np = (char *) xmalloc(new_size);
+           if (*out_offset)
+               memcpy(np, *out_buf, *out_offset);
+           xfree(*out_buf);
+           *out_size = new_size;
+           *out_buf = np;
+       }
+       if (i == 0)
+       {
+           rec_encode_zint(rec_sysno_to_int(rec->sysno), 
+                           (unsigned char *) *out_buf + *out_offset, &len);
+           (*out_offset) += len;
+       }
+       if (rec->size[i] == 0)
+       {
+           rec_encode_unsigned(1, (unsigned char *) *out_buf + *out_offset,
+                               &len);
+           (*out_offset) += len;
+       }
+       else if (last_rec && rec->size[i] == last_rec->size[i] &&
+                !memcmp(rec->info[i], last_rec->info[i], rec->size[i]))
+       {
+           rec_encode_unsigned(0, (unsigned char *) *out_buf + *out_offset,
+                               &len);
+           (*out_offset) += len;
+       }
+       else
+       {
+           rec_encode_unsigned(rec->size[i]+1,
+                               (unsigned char *) *out_buf + *out_offset,
+                               &len);
+           (*out_offset) += len;
+           memcpy(*out_buf + *out_offset, rec->info[i], rec->size[i]);
+           (*out_offset) += rec->size[i];
+       }
+    }
+}
+
+static ZEBRA_RES rec_write_multiple(Records p, int saveCount)
+{
+    int i;
+    short ref_count = 0;
+    char compression_method;
+    Record last_rec = 0;
+    int out_size = 1000;
+    int out_offset = 0;
+    char *out_buf = (char *) xmalloc(out_size);
+    zint *sysnos = (zint *) xmalloc(sizeof(*sysnos) * (p->cache_cur + 1));
+    zint *sysnop = sysnos;
+    ZEBRA_RES ret = ZEBRA_OK;
+
+    for (i = 0; i<p->cache_cur - saveCount; i++)
+    {
+        struct record_cache_entry *e = p->record_cache + i;
+        switch (e->flag)
+        {
+        case recordFlagNew:
+            rec_cache_flush_block1(p, e->rec, last_rec, &out_buf,
+                                   &out_size, &out_offset);
+           *sysnop++ = rec_sysno_to_int(e->rec->sysno);
+           ref_count++;
+           e->flag = recordFlagNop;
+           last_rec = e->rec;
+            break;
+        case recordFlagWrite:
+           if (rec_release_blocks(p, rec_sysno_to_int(e->rec->sysno))
+               != ZEBRA_OK)
+               ret = ZEBRA_FAIL;
+
+            rec_cache_flush_block1(p, e->rec, last_rec, &out_buf,
+                                   &out_size, &out_offset);
+           *sysnop++ = rec_sysno_to_int(e->rec->sysno);
+           ref_count++;
+           e->flag = recordFlagNop;
+           last_rec = e->rec;
+            break;
+        case recordFlagDelete:
+            if (rec_delete_single(p, e->rec) != ZEBRA_OK)
+               ret = ZEBRA_FAIL;
+
+           e->flag = recordFlagNop;
+            break;
+       default:
+           break;
+        }
+    }
+
+    *sysnop = -1;
+    if (ref_count)
+    {
+       unsigned int csize = 0;  /* indicate compression "not performed yet" */
+       compression_method = p->compression_method;
+       switch (compression_method)
+       {
+       case REC_COMPRESS_BZIP2:
+#if HAVE_BZLIB_H       
+           csize = out_offset + (out_offset >> 6) + 620;
+           rec_tmp_expand(p, csize);
+#ifdef BZ_CONFIG_ERROR
+           i = BZ2_bzBuffToBuffCompress 
+#else
+           i = bzBuffToBuffCompress 
+#endif
+                                   (p->tmp_buf+sizeof(zint)+sizeof(short)+
+                                     sizeof(char),
+                                     &csize, out_buf, out_offset, 1, 0, 30);
+           if (i != BZ_OK)
+           {
+               yaz_log(YLOG_WARN, "bzBuffToBuffCompress error code=%d", i);
+               csize = 0;
+           }
+           yaz_log(YLOG_LOG, "compress %4d %5d %5d", ref_count, out_offset,
+                 csize);
+#endif
+           break;
+       case REC_COMPRESS_NONE:
+           break;
+       }
+       if (!csize)  
+       {
+           /* either no compression or compression not supported ... */
+           csize = out_offset;
+           rec_tmp_expand(p, csize);
+           memcpy(p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char),
+                   out_buf, out_offset);
+           csize = out_offset;
+           compression_method = REC_COMPRESS_NONE;
+       }
+       memcpy(p->tmp_buf + sizeof(zint), &ref_count, sizeof(ref_count));
+       memcpy(p->tmp_buf + sizeof(zint)+sizeof(short),
+               &compression_method, sizeof(compression_method));
+               
+       /* -------- compression */
+       if (rec_write_tmp_buf(p, csize + sizeof(short) + sizeof(char), sysnos)
+           != ZEBRA_OK)
+           ret = ZEBRA_FAIL;
+    }
+    xfree(out_buf);
+    xfree(sysnos);
+    return ret;
+}
+
+static ZEBRA_RES rec_cache_flush(Records p, int saveCount)
+{
+    int i, j;
+    ZEBRA_RES ret;
+
+    if (saveCount >= p->cache_cur)
+        saveCount = 0;
+
+    ret = rec_write_multiple(p, saveCount);
+
+    for (i = 0; i<p->cache_cur - saveCount; i++)
+    {
+        struct record_cache_entry *e = p->record_cache + i;
+        rec_free(&e->rec);
+    } 
+    /* i still being used ... */
+    for (j = 0; j<saveCount; j++, i++)
+        memcpy(p->record_cache+j, p->record_cache+i,
+                sizeof(*p->record_cache));
+    p->cache_cur = saveCount;
+    return ret;
+}
+
+static Record *rec_cache_lookup(Records p, zint sysno,
+                               enum recordCacheFlag flag)
+{
+    int i;
+    for (i = 0; i<p->cache_cur; i++)
+    {
+        struct record_cache_entry *e = p->record_cache + i;
+        if (e->rec->sysno == sysno)
+        {
+            if (flag != recordFlagNop && e->flag == recordFlagNop)
+                e->flag = flag;
+            return &e->rec;
+        }
+    }
+    return NULL;
+}
+
+static ZEBRA_RES rec_cache_insert(Records p, Record rec, enum recordCacheFlag flag)
+{
+    struct record_cache_entry *e;
+    ZEBRA_RES ret = ZEBRA_OK;
+
+    if (p->cache_cur == p->cache_max)
+        ret = rec_cache_flush(p, 1);
+    else if (p->cache_cur > 0)
+    {
+        int i, j;
+        int used = 0;
+        for (i = 0; i<p->cache_cur; i++)
+        {
+            Record r = (p->record_cache + i)->rec;
+            for (j = 0; j<REC_NO_INFO; j++)
+                used += r->size[j];
+        }
+        if (used > 90000)
+            ret = rec_cache_flush(p, 1);
+    }
+    assert(p->cache_cur < p->cache_max);
+
+    e = p->record_cache + (p->cache_cur)++;
+    e->flag = flag;
+    e->rec = rec_cp(rec);
+    return ret;
+}
+
+ZEBRA_RES rec_close(Records *pp)
+{
+    Records p = *pp;
+    int i;
+    ZEBRA_RES ret = ZEBRA_OK;
+
+    if (!p)
+       return ret;
+
+    zebra_mutex_destroy(&p->mutex);
+    if (rec_cache_flush(p, 0) != ZEBRA_OK)
+       ret = ZEBRA_FAIL;
+
+    xfree(p->record_cache);
+
+    if (p->rw)
+    {
+        if (recindex_write_head(p->recindex, &p->head, sizeof(p->head)) != ZEBRA_OK)
+           ret = ZEBRA_FAIL;
+    }
+
+    recindex_close(p->recindex);
+
+    for (i = 0; i<REC_BLOCK_TYPES; i++)
+    {
+        if (p->data_BFile[i])
+            bf_close(p->data_BFile[i]);
+        xfree(p->data_fname[i]);
+    }
+    xfree(p->tmp_buf);
+    xfree(p);
+    *pp = NULL;
+    return ret;
+}
+
+static Record rec_get_int(Records p, zint sysno)
+{
+    int i, in_size, r;
+    Record rec, *recp;
+    struct record_index_entry entry;
+    zint freeblock;
+    int dst_type;
+    char *nptr, *cptr;
+    char *in_buf = 0;
+    char *bz_buf = 0;
+#if HAVE_BZLIB_H
+    unsigned int bz_size;
+#endif
+    char compression_method;
+
+    assert(sysno > 0);
+    assert(p);
+
+    if ((recp = rec_cache_lookup(p, sysno, recordFlagNop)))
+        return rec_cp(*recp);
+
+    if (recindex_read_indx(p->recindex, rec_sysno_to_int(sysno), &entry, sizeof(entry), 1) < 1)
+        return NULL;       /* record is not there! */
+
+    if (!entry.size)
+        return NULL;       /* record is deleted */
+
+    dst_type = (int) (entry.next & 7);
+    assert(dst_type < REC_BLOCK_TYPES);
+    freeblock = entry.next / 8;
+
+    assert(freeblock > 0);
+    
+    rec_tmp_expand(p, entry.size);
+
+    cptr = p->tmp_buf;
+    r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr);
+    if (r < 0)
+       return 0;
+    memcpy(&freeblock, cptr, sizeof(freeblock));
+
+    while (freeblock)
+    {
+        zint tmp;
+
+        cptr += p->head.block_size[dst_type] - sizeof(freeblock);
+        
+        memcpy(&tmp, cptr, sizeof(tmp));
+        r = bf_read(p->data_BFile[dst_type], freeblock, 0, 0, cptr);
+       if (r < 0)
+           return 0;
+        memcpy(&freeblock, cptr, sizeof(freeblock));
+        memcpy(cptr, &tmp, sizeof(tmp));
+    }
+
+    rec = (Record) xmalloc(sizeof(*rec));
+    rec->sysno = sysno;
+    memcpy(&compression_method, p->tmp_buf + sizeof(zint) + sizeof(short),
+           sizeof(compression_method));
+    in_buf = p->tmp_buf + sizeof(zint) + sizeof(short) + sizeof(char);
+    in_size = entry.size - sizeof(short) - sizeof(char);
+    switch (compression_method)
+    {
+    case REC_COMPRESS_BZIP2:
+#if HAVE_BZLIB_H
+       bz_size = entry.size * 20 + 100;
+       while (1)
+       {
+           bz_buf = (char *) xmalloc(bz_size);
+#ifdef BZ_CONFIG_ERROR
+           i = BZ2_bzBuffToBuffDecompress
+#else
+           i = bzBuffToBuffDecompress
+#endif
+                (bz_buf, &bz_size, in_buf, in_size, 0, 0);
+           yaz_log(YLOG_LOG, "decompress %5d %5d", in_size, bz_size);
+           if (i == BZ_OK)
+               break;
+           yaz_log(YLOG_LOG, "failed");
+           xfree(bz_buf);
+            bz_size *= 2;
+       }
+       in_buf = bz_buf;
+       in_size = bz_size;
+#else
+       yaz_log(YLOG_FATAL, "cannot decompress record(s) in BZIP2 format");
+       return 0;
+#endif
+       break;
+    case REC_COMPRESS_NONE:
+       break;
+    }
+    for (i = 0; i<REC_NO_INFO; i++)
+       rec->info[i] = 0;
+
+    nptr = in_buf;                /* skip ref count */
+    while (nptr < in_buf + in_size)
+    {
+       zint this_sysno;
+       int len;
+       rec_decode_zint(&this_sysno, (unsigned char *) nptr, &len);
+       nptr += len;
+
+       for (i = 0; i < REC_NO_INFO; i++)
+       {
+           unsigned int this_size;
+           rec_decode_unsigned(&this_size, (unsigned char *) nptr, &len);
+           nptr += len;
+
+           if (this_size == 0)
+               continue;
+           rec->size[i] = this_size-1;
+
+           if (rec->size[i])
+           {
+               rec->info[i] = nptr;
+               nptr += rec->size[i];
+           }
+           else
+               rec->info[i] = NULL;
+       }
+       if (this_sysno == rec_sysno_to_int(sysno))
+           break;
+    }
+    for (i = 0; i<REC_NO_INFO; i++)
+    {
+       if (rec->info[i] && rec->size[i])
+       {
+           char *np = xmalloc(rec->size[i]+1);
+           memcpy(np, rec->info[i], rec->size[i]);
+            np[rec->size[i]] = '\0';
+           rec->info[i] = np;
+       }
+       else
+       {
+           assert(rec->info[i] == 0);
+           assert(rec->size[i] == 0);
+       }
+    }
+    xfree(bz_buf);
+    if (rec_cache_insert(p, rec, recordFlagNop) != ZEBRA_OK)
+       return 0;
+    return rec;
+}
+
+Record rec_get(Records p, zint sysno)
+{
+    Record rec;
+    zebra_mutex_lock(&p->mutex);
+
+    rec = rec_get_int(p, sysno);
+    zebra_mutex_unlock(&p->mutex);
+    return rec;
+}
+
+Record rec_get_root(Records p)
+{
+    return rec_get(p, rec_sysno_to_ext(1));
+}
+
+static Record rec_new_int(Records p)
+{
+    int i;
+    zint sysno;
+    Record rec;
+
+    assert(p);
+    rec = (Record) xmalloc(sizeof(*rec));
+    if (1 || p->head.index_free == 0)
+        sysno = (p->head.index_last)++;
+    else
+    {
+        struct record_index_entry entry;
+
+        if (recindex_read_indx(p->recindex, p->head.index_free, &entry, sizeof(entry), 0) < 1)
+       {
+           xfree(rec);
+           return 0;
+       }
+        sysno = p->head.index_free;
+        p->head.index_free = entry.next;
+    }
+    (p->head.no_records)++;
+    rec->sysno = rec_sysno_to_ext(sysno);
+    for (i = 0; i < REC_NO_INFO; i++)
+    {
+        rec->info[i] = NULL;
+        rec->size[i] = 0;
+    }
+    rec_cache_insert(p, rec, recordFlagNew);
+    return rec;
+}
+
+Record rec_new(Records p)
+{
+    Record rec;
+    zebra_mutex_lock(&p->mutex);
+
+    rec = rec_new_int(p);
+    zebra_mutex_unlock(&p->mutex);
+    return rec;
+}
+
+ZEBRA_RES rec_del(Records p, Record *recpp)
+{
+    Record *recp;
+    ZEBRA_RES ret = ZEBRA_OK;
+
+    zebra_mutex_lock(&p->mutex);
+    (p->head.no_records)--;
+    if ((recp = rec_cache_lookup(p, (*recpp)->sysno, recordFlagDelete)))
+    {
+        rec_free(recp);
+        *recp = *recpp;
+    }
+    else
+    {
+        ret = rec_cache_insert(p, *recpp, recordFlagDelete);
+        rec_free(recpp);
+    }
+    zebra_mutex_unlock(&p->mutex);
+    *recpp = NULL;
+    return ret;
+}
+
+ZEBRA_RES rec_put(Records p, Record *recpp)
+{
+    Record *recp;
+    ZEBRA_RES ret = ZEBRA_OK;
+
+    zebra_mutex_lock(&p->mutex);
+    if ((recp = rec_cache_lookup(p, (*recpp)->sysno, recordFlagWrite)))
+    {
+        rec_free(recp);
+        *recp = *recpp;
+    }
+    else
+    {
+        ret = rec_cache_insert(p, *recpp, recordFlagWrite);
+        rec_free(recpp);
+    }
+    zebra_mutex_unlock(&p->mutex);
+    *recpp = NULL;
+    return ret;
+}
+
+void rec_free(Record *recpp)
+{
+    int i;
+
+    if (!*recpp)
+        return ;
+    for (i = 0; i < REC_NO_INFO; i++)
+        xfree((*recpp)->info[i]);
+    xfree(*recpp);
+    *recpp = NULL;
+}
+
+Record rec_cp(Record rec)
+{
+    Record n;
+    int i;
+
+    n = (Record) xmalloc(sizeof(*n));
+    n->sysno = rec->sysno;
+    for (i = 0; i < REC_NO_INFO; i++)
+        if (!rec->info[i])
+        {
+            n->info[i] = NULL;
+            n->size[i] = 0;
+        }
+        else
+        {
+            n->size[i] = rec->size[i];
+            n->info[i] = (char *) xmalloc(rec->size[i]+1);
+            memcpy(n->info[i], rec->info[i], rec->size[i]);
+            n->info[i][rec->size[i]] = '\0';
+        }
+    return n;
+}
+
+
+char *rec_strdup(const char *s, size_t *len)
+{
+    char *p;
+
+    if (!s)
+    {
+        *len = 0;
+        return NULL;
+    }
+    *len = strlen(s)+1;
+    p = (char *) xmalloc(*len);
+    strcpy(p, s);
+    return p;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+