-/* $Id: rset.h,v 1.24 2004-08-04 09:59:03 heikki Exp $
+/* $Id: rset.h,v 1.25 2004-08-06 09:43:03 heikki Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
Index Data Aps
#define rset_forward(rs, fd, buf, indx, cmpfunc, untilbuf) \
(*(rs)->control->f_forward)((rs), (fd), (buf), (indx), (cmpfunc), (untilbuf))
-/* int rset_count(RSET rs); */
-#define rset_count(rs) (*(rs)->control->f_count)(rs)
+/* int rset_pos(RSET rs, RSFD fd, zint *current, zint *total); */
+#define rset_pos(rs,fd,cur,tot) \
+ (*(rs)->control->f_pos)( (fd),(cur),(tot))
/* int rset_read(RSET rs, void *buf, int *indx); */
#define rset_read(rs, fd, buf, indx) (*(rs)->control->f_read)((fd), (buf), indx)
-/* $Id: retrieve.c,v 1.22 2004-08-04 08:35:23 adam Exp $
+/* $Id: retrieve.c,v 1.23 2004-08-06 09:43:03 heikki Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
if (!rec)
{
logf (LOG_DEBUG, "rec_get fail on sysno=" ZINT_FORMAT, sysno);
- *basenamep = 0;
+ *basenamep = 0;
return 14;
}
recordAttr = rec_init_attr (zh->reg->zei, rec);
-/* $Id: zsets.c,v 1.50 2004-08-04 08:35:24 adam Exp $
+/* $Id: zsets.c,v 1.51 2004-08-06 09:43:03 heikki Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset)
{
- int kno = 0;
+ zint kno = 0;
struct it_key key;
RSFD rfd;
int term_index, i;
struct rank_control *rc;
struct zset_sort_info *sort_info;
const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
+ zint cur,tot;
+ zint est=-2; /* -2 not done, -1 can't do, >0 actual estimate*/
+ zint esthits;
sort_info = zebraSet->sort_info;
sort_info->num_entries = 0;
void *handle =
(*rc->begin) (zh->reg, rank_class->class_handle, rset);
(zebraSet->hits)++;
+ esthits=atoi(res_get_def(zh->res,"estimatehits","0"));
+ if (!esthits) est=-1; /* can not do */
do
{
#if IT_KEY_NEW
psysno = this_sys;
}
(*rc->add) (handle, this_sys, term_index);
+ if ( (est==-2) && (zebraSet->hits==esthits))
+ { /* time to estimate the hits */
+ float f;
+ rset_pos(rset,rfd,&cur,&tot);
+ if (tot>0) {
+ f=1.0*cur/tot;
+ est=(zint)(zebraSet->hits/f);
+ /* FIXME - round the guess to 3 digits */
+ logf(LOG_LOG, "Estimating hits (%s) "
+ ZINT_FORMAT"->%d"
+ "; "ZINT_FORMAT"->"ZINT_FORMAT,
+ rset->control->desc,
+ cur, zebraSet->hits,
+ tot,est);
+ zebraSet->hits=est;
+ }
+ }
}
- while (rset_read (rset, rfd, &key, &term_index));
+ while (rset_read (rset, rfd, &key, &term_index) && (est<0) );
+
score = (*rc->calc) (handle, psysno);
resultSetInsertRank (zh, sort_info, psysno, score, 'A');
(*rc->end) (zh->reg, handle);
rset->rset_terms[i]->flags,
rset->rset_terms[i]->count);
- yaz_log (LOG_LOG, "%d keys, %d distinct sysnos", kno, zebraSet->hits);
+ yaz_log (LOG_LOG, ZINT_FORMAT " keys, %d distinct sysnos",
+ kno, zebraSet->hits);
}
ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name)
-/* $Id: isamb.c,v 1.49 2004-08-04 09:59:03 heikki Exp $
+/* $Id: isamb.c,v 1.50 2004-08-06 09:43:03 heikki Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
#if NEW_FORWARD == 1
-/*
-#undef ISAMB_DEBUB
-#define ISAMB_DEBUG 1
-*/
static int isamb_pp_on_right_node(ISAMB_PP pp, int level, const void *untilbuf)
{ /* looks one node higher to see if we should be on this node at all */
/* useful in backing off quickly, and in avoiding tail descends */
char *end=p->bytes+p->size;
char *cur=p->bytes+p->offset;
char *dst;
+ void *decodeClientData;
assert(p->offset <= p->size);
assert(cur <= end);
assert(p->leaf);
*current=0;
*total=0;
+ decodeClientData = (pp->isamb->method->codec.start)();
+
while(src < end) {
dst=dummybuf;
- (*pp->isamb->method->codec.decode)(p->decodeClientData,&dst, &src);
+ (*pp->isamb->method->codec.decode)(decodeClientData,&dst, &src);
assert(dst<(char*) dummybuf+100); /*FIXME */
(*total)++;
if (src<=cur)
(*current)++;
}
+#if ISAMB_DEBUG
logf(LOG_DEBUG, "isamb_pp_leaf_pos: cur="ZINT_FORMAT" tot="ZINT_FORMAT
" ofs=%d sz=%d lev=%d",
*current, *total, p->offset, p->size, pp->level);
+#endif
assert(src==end);
+ (pp->isamb->method->codec.stop)(decodeClientData);
}
static void isamb_pp_upper_pos( ISAMB_PP pp, zint *current, zint *total,
char *cur=p->bytes+p->offset;
zint item_size;
ISAMB_P child;
+
assert(level>=0);
assert(!p->leaf);
+
+#if ISAMB_DEBUG
logf(LOG_DEBUG,"isamb_pp_upper_pos at beginning l=%d "
"cur="ZINT_FORMAT" tot="ZINT_FORMAT
" ofs=%d sz=%d pos=" ZINT_FORMAT,
level, *current, *total, p->offset, p->size, p->pos);
+#endif
assert (p->offset <= p->size);
decode_ptr (&src, &child ); /* first child */
while(src < end) {
-/* $Id: rsbool.c,v 1.33 2004-08-04 09:59:03 heikki Exp $
+/* $Id: rsbool.c,v 1.34 2004-08-06 09:43:03 heikki Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
#include <assert.h>
#include <zebrautl.h>
+#include <rset.h>
#include <rsbool.h>
#ifndef RSET_DEBUG
static int r_forward(RSET ct, RSFD rfd, void *buf, int *term_index,
int (*cmpfunc)(const void *p1, const void *p2),
const void *untilbuf);
-/* static void r_pos (RSFD rfd, int *current, int *total); */
+static void r_pos (RSFD rfd, zint *current, zint *total);
static int r_read_and (RSFD rfd, void *buf, int *term_index);
static int r_read_or (RSFD rfd, void *buf, int *term_index);
static int r_read_not (RSFD rfd, void *buf, int *term_index);
r_delete,
r_rewind,
r_forward, /* rset_default_forward, */
- rset_default_pos,
+ r_pos, /* rset_default_pos */
r_read_and,
r_write,
};
r_close,
r_delete,
r_rewind,
-#if 1
r_forward,
-#else
- rset_default_forward,
-#endif
- rset_default_pos,
+ r_pos,
r_read_or,
r_write,
};
r_delete,
r_rewind,
r_forward,
- rset_default_pos,
+ r_pos,
r_read_not,
r_write,
};
};
struct rset_bool_rfd {
+ zint hits;
RSFD rfd_l;
RSFD rfd_r;
int more_l;
rfd->next = info->rfd_list;
info->rfd_list = rfd;
rfd->info = info;
+ rfd->hits=0;
rfd->buf_l = xmalloc (info->key_size);
rfd->buf_r = xmalloc (info->key_size);
rset_rewind (info->rset_r, p->rfd_r);
p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l, &p->term_index_l);
p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r, &p->term_index_r);
+ p->hits=0;
}
static int r_forward (RSET ct, RSFD rfd, void *buf, int *term_index,
struct rset_bool_rfd *p = (struct rset_bool_rfd *) rfd;
struct rset_bool_info *info = p->info;
+ { zint cur,tot; r_pos(rfd, &cur, &tot); }
+
while (p->more_l || p->more_r)
{
int cmp;
key_logdump(LOG_DEBUG,buf);
(*info->log_item)(LOG_DEBUG, buf, "");
#endif
+ p->hits++;
return 1;
}
else if (cmp == -1)
rfd, p->more_l, p->more_r, cmp);
(*info->log_item)(LOG_DEBUG, buf, "");
#endif
+ p->hits++;
return 1;
}
else if (cmp > 1) /* cmp == 2 */
p->more_l, p->more_r, cmp);
(*info->log_item)(LOG_DEBUG, buf, "");
#endif
+ p->hits++;
return 1;
}
#else
rfd, p->more_l, p->more_r, cmp);
(*info->log_item)(LOG_DEBUG, buf, "");
#endif
+ p->hits++;
return 1;
}
else
rfd, p->more_l, p->more_r, cmp);
(*info->log_item)(LOG_DEBUG, buf, "");
#endif
+ p->hits++;
return 1;
}
#else
rfd, p->more_l, p->more_r, cmp);
(*info->log_item)(LOG_DEBUG, buf, "");
#endif
+ p->hits++;
return 1;
}
else
struct rset_bool_rfd *p = (struct rset_bool_rfd *) rfd;
struct rset_bool_info *info = p->info;
+ { zint cur,tot; r_pos(rfd, &cur, &tot); }
while (p->more_l || p->more_r)
{
int cmp;
p->more_l, p->more_r, cmp);
(*info->log_item)(LOG_DEBUG, buf, "");
#endif
+ p->hits++;
return 1;
}
else if (cmp > 0)
p->more_l, p->more_r, cmp);
(*info->log_item)(LOG_DEBUG, buf, "");
#endif
+ p->hits++;
return 1;
}
else
p->more_l, p->more_r, cmp);
(*info->log_item)(LOG_DEBUG, buf, "");
#endif
+ p->hits++;
return 1;
}
}
struct rset_bool_rfd *p = (struct rset_bool_rfd *) rfd;
struct rset_bool_info *info = p->info;
+ { zint cur,tot; r_pos(rfd, &cur, &tot); }
while (p->more_l || p->more_r)
{
int cmp;
*term_index = p->term_index_l;
p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l,
&p->term_index_l);
+ p->hits++;
return 1;
}
else if (cmp > 1)
- {
-#if 0
- p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r,
- &p->term_index_r);
-#else
- p->more_r = rset_forward(
- info->rset_r, p->rfd_r,
- p->buf_r, &p->term_index_r,
- (info->cmp), p->buf_l);
-#endif
- }
+ {
+ p->more_r = rset_forward(
+ info->rset_r, p->rfd_r,
+ p->buf_r, &p->term_index_r,
+ (info->cmp), p->buf_l);
+ }
else
{
memcpy (buf, p->buf_l, info->key_size);
return -1;
}
+static void r_pos (RSFD rfd, zint *current, zint *total)
+{
+ struct rset_bool_rfd *p = (struct rset_bool_rfd *) rfd;
+ struct rset_bool_info *info = p->info;
+ zint lcur,ltot;
+ zint rcur,rtot;
+ float r;
+ ltot=-1; rtot=-1;
+ rset_pos(info->rset_l, p->rfd_l, &lcur, <ot);
+ rset_pos(info->rset_r, p->rfd_r, &rcur, &rtot);
+ if ( (rtot<0) && (ltot<0)) { /*no position */
+ *current=rcur; /* return same as you got */
+ *total=rtot; /* probably -1 for not available */
+ }
+ if ( rtot<0) { rtot=0; rcur=0;} /* if only one useful, use it */
+ if ( ltot<0) { ltot=0; lcur=0;}
+ if ( rtot+ltot == 0 ) { /* empty rset */
+ *current=0;
+ *total=0;
+ return;
+ }
+ r=1.0*(lcur+rcur)/(ltot+rtot); /* weighed average of l and r */
+ *current=p->hits;
+ *total=(zint)(0.5+*current/r);
+#if RSET_DEBUG
+ yaz_log(LOG_DEBUG,"bool_pos: (%s/%s) "ZINT_FORMAT"/"ZINT_FORMAT"= %0.4f ",
+ info->rset_l->control->desc, info->rset_r->control->desc,
+ *current, *total, r);
+#endif
+}
-/* $Id: rsisamb.c,v 1.11 2004-08-04 09:59:03 heikki Exp $
+/* $Id: rsisamb.c,v 1.12 2004-08-06 09:43:04 heikki Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
struct rset_pp_info *pinfo = (struct rset_pp_info *) rfd;
assert(rfd);
isamb_pp_pos(pinfo->pt, current, total);
+#if RSET_DEBUG
+ logf(LOG_DEBUG,"isamb.r_pos returning "ZINT_FORMAT"/"ZINT_FORMAT,
+ *current, *total);
+#endif
}
static int r_read (RSFD rfd, void *buf, int *term_index)
struct rset_pp_info *pinfo = (struct rset_pp_info *) rfd;
int r;
*term_index = 0;
+
r = isamb_pp_read(pinfo->pt, buf);
if (r > 0)
{
-/* $Id: rstemp.c,v 1.38 2004-08-03 14:54:41 heikki Exp $
+/* $Id: rstemp.c,v 1.39 2004-08-06 09:43:04 heikki Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
Index Data Aps
/* static int r_count (RSET ct);*/
static int r_read (RSFD rfd, void *buf, int *term_index);
static int r_write (RSFD rfd, const void *buf);
+static void r_pos (RSFD rfd, zint *current, zint *total);
static const struct rset_control control =
{
r_delete,
r_rewind,
rset_default_forward,
- rset_default_pos,
+ r_pos,
r_read,
r_write,
};
size_t pos_buf; /* position of first byte in window */
size_t pos_border; /* position of last byte+1 in window */
int dirty; /* window is dirty */
- int hits; /* no of hits */
+ zint hits; /* no of hits */
char *temp_path;
int (*cmp)(const void *p1, const void *p2);
struct rset_temp_rfd *rfd_list;
int *countp;
void *buf;
size_t pos_cur; /* current position in set */
+ zint cur; /* number of the current hit */
};
static void *r_create(RSET ct, const struct rset_control *sel, void *parms)
info->pos_end = 0;
info->pos_buf = 0;
info->dirty = 0;
- info->hits = -1;
+ info->hits = 0;
info->cmp = temp_parms->cmp;
info->rfd_list = NULL;
((struct rset_temp_rfd *)rfd)->pos_cur = 0;
info->pos_buf = 0;
r_reread (rfd);
+ ((struct rset_temp_rfd *)rfd)->cur=0;
}
/*
memcpy (mrfd->buf, buf, mrfd->info->key_size);
(*mrfd->countp)++;
}
+ mrfd->cur++;
return 1;
}
mrfd->pos_cur = nc;
if (nc > info->pos_end)
info->pos_border = info->pos_end = nc;
+ info->hits++;
return 1;
}
+
+static void r_pos (RSFD rfd, zint *current, zint *total)
+{
+ struct rset_temp_rfd *mrfd = (struct rset_temp_rfd*) rfd;
+ *current=mrfd->cur;
+ *total=mrfd->info->hits;
+}