1.4 versions, but is present in the 1.3 series. Bug #124.
+Added support for term hit counts. This was not in place for earlier
+1.4 versions, but is present in the 1.3 series. Bug #124.
+
Implemented the 'equivalent' directive for .chr-files.
Added zebra_get_bfs to get BFile handle for session.
-/* $Id: api.h,v 1.23 2005-05-09 10:16:12 adam Exp $
+/* $Id: api.h,v 1.24 2005-05-24 11:35:41 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
int num_setnames, char **setnames,
int *statuses);
+
+
+YAZ_EXPORT
+ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
+ int *num_terms);
+
+YAZ_EXPORT
+ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
+ int no, zint *count, int *approx,
+ char *termbuf, size_t *termlen);
+
+
/**
\fn ZEBRA_RES zebra_scan(ZebraHandle zh, ODR stream, \
Z_AttributesPlusTerm *zapt, oid_value attributeset, \
-/* $Id: rset.h,v 1.51 2005-05-03 09:11:34 adam Exp $
+/* $Id: rset.h,v 1.52 2005-05-24 11:35:41 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
RSET rset; /* ptr to the rset this FD is opened to */
void *priv; /* private parameters for this type */
RSFD next; /* to keep lists of used/free rsfd's */
+ zint counted_items;
+ char *counted_buf;
};
int rset_default_forward(RSFD rfd, void *buf, TERMID *term,
const void *untilbuf);
+/** rset_default_read implements a generic read */
+int rset_default_read(RSFD rfd, void *buf, TERMID *term);
+
/** rset_get_no_terms is a getterms function for those that don't have any */
void rset_get_no_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
{
const struct rset_control *control;
struct rset_key_control *keycontrol;
- int count; /* reference count */
- void *priv; /* stuff private to the given type of rset */
- NMEM nmem; /* nibble memory for various allocs */
- char my_nmem; /* Should the nmem be destroyed with the rset? */
- /* 1 if created with it, 0 if passed from above */
+ int refcount; /* reference count */
+ void *priv; /* stuff private to the given type of rset */
+ NMEM nmem; /* nibble memory for various allocs */
+ char my_nmem; /* Should the nmem be destroyed with the rset? */
+ /* 1 if created with it, 0 if passed from above */
RSFD free_list; /* all rfd's allocated but not currently in use */
RSFD use_list; /* all rfd's in use */
- int scope; /* On what level do we count hits and compare them? */
- TERMID term; /* the term thing for ranking etc */
+ int scope; /* On what level do we count hits and compare them? */
+ TERMID term; /* the term thing for ranking etc */
+ int no_children;
+ RSET *children;
+ zint hits_limit;
+ zint hits_count;
+ zint hits_round;
+ int hits_approx;
} rset;
/* rset is a "virtual base class", which will never exist on its own
* all instances are rsets of some specific type, like rsisamb, or rsbool
*/
RSFD rfd_create_base(RSET rs);
-void rfd_delete_base(RSFD rfd);
int rfd_is_last(RSFD rfd);
RSET rset_create_base(const struct rset_control *sel,
NMEM nmem,
struct rset_key_control *kcontrol,
int scope,
- TERMID term);
+ TERMID term,
+ int no_children, RSET *children);
void rset_delete(RSET rs);
RSET rset_dup (RSET rs);
-
+void rset_close(RSFD rfd);
#define RSETF_READ 0
#define RSETF_WRITE 1
/* RSFD rset_open(RSET rs, int wflag); */
#define rset_open(rs, wflag) (*(rs)->control->f_open)((rs), (wflag))
-/* void rset_close(RSFD rfd); */
-#define rset_close(rfd) (*(rfd)->rset->control->f_close)(rfd)
-
/* int rset_forward(RSFD rfd, void *buf, TERMID term, void *untilbuf); */
#define rset_forward(rfd, buf, term, untilbuf) \
- (*(rfd)->rset->control->f_forward)((rfd),(buf),(term),(untilbuf))
+ rset_default_forward((rfd), (buf), (term), (untilbuf))
/* void rset_getterms(RSET ct, TERMID *terms, int maxterms, int *curterm); */
#define rset_getterms(ct, terms, maxterms, curterm) \
(*(rfd)->rset->control->f_pos)((rfd),(cur),(tot))
/* int rset_read(RSFD rfd, void *buf, TERMID term); */
-#define rset_read(rfd, buf, term) \
- (*(rfd)->rset->control->f_read)((rfd), (buf), (term))
+#define rset_read(rfd, buf, term) rset_default_read((rfd), (buf), (term))
/* int rset_write(RSFD rfd, const void *buf); */
#define rset_write(rfd, buf) (*(rfd)->rset->control->f_write)((rfd), (buf))
RSET rstemp_create(NMEM nmem, struct rset_key_control *kcontrol,
int scope, const char *temp_path, TERMID term);
-RSET rsnull_create(NMEM nmem, struct rset_key_control *kcontrol);
+RSET rsnull_create(NMEM nmem, struct rset_key_control *kcontrol, TERMID term);
RSET rsbool_create_and(NMEM nmem, struct rset_key_control *kcontrol,
int scope, RSET rset_l, RSET rset_r);
RSET rset_attr);
RSET rsmulti_or_create(NMEM nmem, struct rset_key_control *kcontrol,
- int scope, int no_rsets, RSET* rsets);
+ int scope, TERMID termid, int no_rsets, RSET* rsets);
RSET rsmulti_and_create(NMEM nmem, struct rset_key_control *kcontrol,
int scope, int no_rsets, RSET* rsets);
RSET rsisams_create(NMEM nmem, struct rset_key_control *kcontrol,
int scope, ISAMS is, ISAM_P pos, TERMID term);
+void rset_visit(RSET rset, int level);
+
YAZ_END_CDECL
#endif
-/* $Id: trunc.c,v 1.57 2005-05-03 09:11:34 adam Exp $
+/* $Id: trunc.c,v 1.58 2005-05-24 11:35:42 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
RSET result;
int trunc_chunk;
+ termid = rset_term_create(term, length, flags, term_type, rset_nmem);
if (no < 1)
- return rsnull_create(rset_nmem, kctrl);
+ return rsnull_create(rset_nmem, kctrl, termid);
- termid = rset_term_create(term, length, flags, term_type, rset_nmem);
if (zi->reg->isams)
{
if (no == 1)
int i;
for (i = 0; i<no; i++)
rsets[i] = rsisamb_create(rset_nmem, kctrl, scope,
- zi->reg->isamb, isam_p[i], termid);
- r = rsmulti_or_create( rset_nmem, kctrl, scope, no, rsets);
+ zi->reg->isamb, isam_p[i],
+ 0 /* termid */);
+ r = rsmulti_or_create(rset_nmem, kctrl, scope,
+ termid /* termid */,
+ no, rsets);
xfree(rsets);
return r;
}
else
{
yaz_log(YLOG_WARN, "Unknown isam set in rset_trunc");
- return rsnull_create(rset_nmem, kctrl);
+ return rsnull_create(rset_nmem, kctrl, 0);
}
trunc_chunk = atoi(res_get_def(zi->res, "truncchunk", "100"));
result = rset_trunc_r(zi, term, length, flags, isam_p, 0, no, trunc_chunk,
-/* $Id: zrpn.c,v 1.191 2005-05-11 12:39:37 adam Exp $
+/* $Id: zrpn.c,v 1.192 2005-05-24 11:35:42 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
if (res != ZEBRA_OK)
return res;
if (num_result_sets == 0)
- *rset = rsnull_create (rset_nmem, kc);
+ *rset = rsnull_create (rset_nmem, kc, 0);
else if (num_result_sets == 1)
*rset = result_sets[0];
else
if (res != ZEBRA_OK)
return res;
if (num_result_sets == 0)
- *rset = rsnull_create (rset_nmem, kc);
+ *rset = rsnull_create (rset_nmem, kc, 0);
else if (num_result_sets == 1)
*rset = result_sets[0];
else
- *rset = rsmulti_or_create(rset_nmem, kc, kc->scope,
+ *rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */,
num_result_sets, result_sets);
if (!*rset)
return ZEBRA_FAIL;
if (res != ZEBRA_OK)
return res;
if (num_result_sets == 0)
- *rset = rsnull_create (rset_nmem, kc);
+ *rset = rsnull_create (rset_nmem, kc, 0);
else if (num_result_sets == 1)
*rset = result_sets[0];
else
return ZEBRA_FAIL;
}
if (num_result_sets == 0)
- *rset = rsnull_create(rset_nmem, kc);
+ *rset = rsnull_create(rset_nmem, kc, 0);
if (num_result_sets == 1)
*rset = result_sets[0];
else
oid_value attributeSet, NMEM stream,
Z_SortKeySpecList *sort_sequence,
const char *rank_type,
+ NMEM rset_nmem,
RSET *rset,
struct rset_key_control *kc)
{
sks->which = Z_SortKeySpec_null;
sks->u.null = odr_nullval ();
sort_sequence->specs[i] = sks;
- *rset = rsnull_create (NULL, kc);
+ *rset = rsnull_create (rset_nmem, kc, 0);
return ZEBRA_OK;
}
const char *flags = "void";
if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
- return rsnull_create(rset_nmem, kc);
+ return rsnull_create(rset_nmem, kc, 0);
if (ord < 0)
- return rsnull_create(rset_nmem, kc);
+ return rsnull_create(rset_nmem, kc, 0);
if (prefix_len)
term_dict[prefix_len++] = '|';
else
if (sort_flag)
return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
- rank_type, rset, kc);
+ rank_type, rset_nmem, rset, kc);
xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
if (xpath_len >= 0)
{
break;
case Z_Operator_or:
rset = rsmulti_or_create(rset_nmem, kc,
- kc->scope,
+ kc->scope, 0, /* termid */
*num_result_sets, *result_sets);
break;
case Z_Operator_and_not:
zapt->term->which,rset_nmem,
kc, kc->scope);
rset = rsmulti_or_create(rset_nmem, kc,
- 2, kc->scope, rsets);
+ kc->scope, 0 /* termid */,
+ 2, rsets);
}
ptr[j]++;
}
rsets[1] = rset_dup(limit_set);
rset = rsmulti_and_create(rset_nmem, kc,
- kc->scope, 2, rsets);
+ kc->scope,
+ 2, rsets);
}
/* count it */
count_set(rset, &glist[lo].occurrences);
zapt->term->which, rset_nmem,
kc, kc->scope);
rset = rsmulti_or_create(rset_nmem, kc,
- 2, kc->scope, rsets);
+ kc->scope, 0 /* termid */, 2, rsets);
ptr[j]++;
}
-/* $Id: zserver.c,v 1.133 2005-05-12 10:17:07 adam Exp $
+/* $Id: zserver.c,v 1.134 2005-05-24 11:35:42 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
return r;
}
-static void search_terms (ZebraHandle zh, bend_search_rr *r)
+static void search_terms(ZebraHandle zh, bend_search_rr *r)
{
- zint count = 0;
int no_terms;
int i;
int type = Z_Term_general;
struct Z_External *ext;
Z_SearchInfoReport *sr;
- /* get no of terms for result set */
- no_terms = 0; /* zebra_resultSetTerms (zh, r->setname, 0, 0, 0, 0, 0); */
- /* FIXME - Rsets don't know number of terms no more ??? */
+ zebra_result_set_term_no(zh, r->setname, &no_terms);
if (!no_terms)
return;
for (i = 0; i<no_terms; i++)
{
Z_Term *term;
+ zint count;
+ int approx;
char outbuf[1024];
size_t len = sizeof(outbuf);
- /* FIXME - Can we just skip this ??? */
- /*
- zebra_resultSetTerms (zh, r->setname, i,
- &count, &type, outbuf, &len);
- */
+
+ zebra_result_set_term_info(zh, r->setname, i,
+ &count, &approx, outbuf, &len);
+
sr->elements[i] = odr_malloc (r->stream, sizeof(**sr->elements));
sr->elements[i]->subqueryId = 0;
sr->elements[i]->fullQuery = odr_malloc (r->stream,
-/* $Id: zsets.c,v 1.82 2005-05-11 12:39:37 adam Exp $
+/* $Id: zsets.c,v 1.83 2005-05-24 11:35:42 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
int use;
char *term;
};
+
struct zebra_set {
char *name;
RSET rset;
}
}
-ZebraMetaRecord *zebra_meta_records_create_range (ZebraHandle zh,
- const char *name,
- zint start, int num)
+ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
+ const char *name,
+ zint start, int num)
{
zint pos_small[10];
zint *pos = pos_small;
return mr;
}
-ZebraMetaRecord *zebra_meta_records_create (ZebraHandle zh, const char *name,
- int num, zint *positions)
+ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name,
+ int num, zint *positions)
{
ZebraSet sset;
ZebraMetaRecord *sr = 0;
}
rset_close (rfd);
yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
- kno, sset->hits);
+ kno, sset->hits);
for (i = 0; i < numTerms; i++)
yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
- terms[i]->name, terms[i]->flags, rset_count(terms[i]->rset));
+ terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
*sort_status = Z_SortResponse_success;
return ZEBRA_OK;
}
-RSET resultSetRef (ZebraHandle zh, const char *resultSetId)
+RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
{
ZebraSet s;
return NULL;
}
-ZEBRA_RES resultSetRank (ZebraHandle zh, ZebraSet zebraSet,
- RSET rset, NMEM nmem)
+ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
+ RSET rset, NMEM nmem)
{
- zint kno = 0;
struct it_key key;
- RSFD rfd;
TERMID termid;
TERMID *terms;
+ zint kno = 0;
int numTerms = 0;
int n = 0;
int i;
ZebraRankClass rank_class;
- struct rank_control *rc;
struct zset_sort_info *sort_info;
const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
- double cur,tot;
- zint est=-2; /* -2 not done, -1 can't do, >0 actual estimate*/
- zint esthits;
- double ratio;
if (!log_level_set)
loglevels();
sort_info->num_entries = 0;
zebraSet->hits = 0;
rset_getterms(rset, 0, 0, &n);
- yaz_log(YLOG_LOG, "Got %d terms", n);
terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
rset_getterms(rset, terms, n, &numTerms);
- rfd = rset_open (rset, RSETF_READ);
-
- rank_class = zebraRankLookup (zh, rank_handler_name);
+ rank_class = zebraRankLookup(zh, rank_handler_name);
if (!rank_class)
{
yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
return ZEBRA_FAIL;
}
- rc = rank_class->control;
-
- if (rset_read (rfd, &key, &termid))
+ else
{
- zint psysno = key.mem[0];
- int score;
- void *handle =
- (*rc->begin) (zh->reg, rank_class->class_handle, rset, nmem,
- terms, numTerms);
- (zebraSet->hits)++;
- esthits = atoi(res_get_def(zh->res, "estimatehits","0"));
- if (!esthits)
- est = -1; /* can not do */
- do
- {
- zint this_sys = key.mem[0]; /* FIXME - assumes scope==2 */
- zint seqno = key.mem[key.len-1]; /* FIXME - assumes scope==2 */
- kno++;
- key_logdump_txt(log_level_searchhits,&key," Got hit");
- if (this_sys != psysno)
- {
- score = (*rc->calc) (handle, psysno);
-
- resultSetInsertRank (zh, sort_info, psysno, score, 'A');
- (zebraSet->hits)++;
- psysno = this_sys;
- }
- (*rc->add) (handle, CAST_ZINT_TO_INT(seqno), termid);
-
- if ((est==-2) && (zebraSet->hits==esthits))
- { /* time to estimate the hits */
- rset_pos(rfd,&cur,&tot);
- if (tot>0) {
- ratio = cur/tot;
- est = (zint)(0.5+zebraSet->hits/ratio);
- yaz_log(log_level_searchhits, "Estimating hits (%s) "
- "%0.1f->" ZINT_FORMAT
- "; %0.1f->" ZINT_FORMAT,
- rset->control->desc,
- cur, zebraSet->hits,
- tot, est);
- i = 0; /* round to 3 significant digits */
- while (est>1000) {
- est /= 10;
- i++;
- }
- while (i--)
- est *= 10;
- zebraSet->hits = est;
- }
- }
- }
- while (rset_read (rfd, &key,&termid) && (est<0) );
- score = (*rc->calc)(handle, psysno);
- resultSetInsertRank(zh, sort_info, psysno, score, 'A');
- (*rc->end) (zh->reg, handle);
+ RSFD rfd = rset_open(rset, RSETF_READ);
+ struct rank_control *rc = rank_class->control;
+ double score;
+
+ void *handle =
+ (*rc->begin) (zh->reg, rank_class->class_handle, rset, nmem,
+ terms, numTerms);
+ zint psysno = 0;
+ while (rset_read(rfd, &key, &termid))
+ {
+ zint this_sys = key.mem[0];
+ zint seqno = key.mem[key.len-1];
+ kno++;
+ key_logdump_txt(log_level_searchhits, &key, " Got hit");
+ if (this_sys != psysno)
+ {
+ if (rfd->counted_items >= rset->hits_limit)
+ break;
+ if (psysno)
+ {
+ score = (*rc->calc) (handle, psysno);
+ resultSetInsertRank (zh, sort_info, psysno, score, 'A');
+ }
+ psysno = this_sys;
+ }
+ (*rc->add) (handle, CAST_ZINT_TO_INT(seqno), termid);
+ }
+ if (psysno)
+ {
+ score = (*rc->calc)(handle, psysno);
+ resultSetInsertRank(zh, sort_info, psysno, score, 'A');
+ }
+ (*rc->end) (zh->reg, handle);
+ rset_close (rfd);
}
- rset_close (rfd);
-
+ zebraSet->hits = rset->hits_count;
+
yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
ZINT_FORMAT " sysnos, rank", kno, zebraSet->hits);
for (i = 0; i < numTerms; i++)
{
yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
ZINT_FORMAT,
- terms[i]->name, terms[i]->flags, rset_count(terms[i]->rset));
+ terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
}
return ZEBRA_OK;
}
-ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name)
+ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
{
ZebraRankClass p = zh->reg->rank_classes;
while (p && strcmp (p->control->name, name))
return p;
}
-void zebraRankInstall (struct zebra_register *reg, struct rank_control *ctrl)
+void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
{
ZebraRankClass p = (ZebraRankClass) xmalloc (sizeof(*p));
p->control = (struct rank_control *) xmalloc (sizeof(*p->control));
reg->rank_classes = p;
}
-void zebraRankDestroy (struct zebra_register *reg)
+void zebraRankDestroy(struct zebra_register *reg)
{
ZebraRankClass p = reg->rank_classes;
while (p)
}
reg->rank_classes = NULL;
}
+
+static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
+ zint *hits_array, int *approx_array)
+{
+ int no = 0;
+ int i;
+ for (i = 0; i<rset->no_children; i++)
+ no += trav_rset_for_termids(rset->children[i],
+ (termid_array ? termid_array + no : 0),
+ (hits_array ? hits_array + no : 0),
+ (approx_array ? approx_array + no : 0));
+ if (rset->term)
+ {
+ if (termid_array)
+ termid_array[no] = rset->term;
+ if (hits_array)
+ hits_array[no] = rset->hits_count;
+ if (approx_array)
+ approx_array[no] = rset->hits_approx;
+#if 0
+ yaz_log(YLOG_LOG, "rset=%p term=%s count=" ZINT_FORMAT,
+ rset, rset->term->name, rset->hits_count);
+#endif
+ no++;
+ }
+ return no;
+}
+
+ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
+ int *num_terms)
+{
+ ZebraSet sset = resultSetGet(zh, setname);
+ *num_terms = 0;
+ if (sset)
+ {
+ *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
+ return ZEBRA_OK;
+ }
+ return ZEBRA_FAIL;
+}
+
+ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
+ int no, zint *count, int *approx,
+ char *termbuf, size_t *termlen)
+{
+ ZebraSet sset = resultSetGet(zh, setname);
+ if (sset)
+ {
+ int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
+ if (no >= 0 && no < num_terms)
+ {
+ TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
+ zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
+ int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
+
+ trav_rset_for_termids(sset->rset, term_array,
+ hits_array, approx_array);
+
+ if (count)
+ *count = hits_array[no];
+ if (approx)
+ *approx = approx_array[no];
+ if (termbuf)
+ {
+ char *inbuf = term_array[no]->name;
+ size_t inleft = strlen(inbuf);
+ size_t outleft = *termlen - 1;
+
+ if (zh->iconv_from_utf8 != 0)
+ {
+ char *outbuf = termbuf;
+ size_t ret;
+
+ ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
+ &outbuf, &outleft);
+ if (ret == (size_t)(-1))
+ *termlen = 0;
+ else
+ *termlen = outbuf - termbuf;
+ }
+ else
+ {
+ if (inleft > outleft)
+ inleft = outleft;
+ *termlen = inleft;
+ memcpy(termbuf, inbuf, *termlen);
+ }
+ termbuf[*termlen] = '\0';
+ }
+
+ xfree(term_array);
+ xfree(hits_array);
+ xfree(approx_array);
+ return ZEBRA_OK;
+ }
+ }
+ return ZEBRA_FAIL;
+}
-/* $Id: rsbetween.c,v 1.38 2005-05-03 09:11:35 adam Exp $
+/* $Id: rsbetween.c,v 1.39 2005-05-24 11:35:43 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
#define ATTRTAG 3
struct rset_between_info {
- RSET andset; /* the multi-and of the above */
TERMID startterm; /* pseudo terms for detecting which one we read from */
TERMID stopterm;
TERMID attrterm;
int scope,
RSET rset_l, RSET rset_m, RSET rset_r, RSET rset_attr)
{
- RSET rnew = rset_create_base(&control, nmem, kcontrol, scope,0);
+ RSET rnew = rset_create_base(&control, nmem, kcontrol, scope, 0, 0, 0);
struct rset_between_info *info=
(struct rset_between_info *) nmem_malloc(rnew->nmem,sizeof(*info));
RSET rsetarray[4];
rsetarray[ATTRTAG] = rset_attr;
/* make sure we have decent terms for all rsets. Create dummies if needed*/
- checkterm( rsetarray[STARTTAG], "(start)",nmem);
- checkterm( rsetarray[STOPTAG], "(start)",nmem);
+ checkterm(rsetarray[STARTTAG], "(start)", nmem);
+ checkterm(rsetarray[STOPTAG], "(start)", nmem);
info->startterm = rsetarray[STARTTAG]->term;
info->stopterm = rsetarray[STOPTAG]->term;
if (rset_attr)
{
- checkterm( rsetarray[ATTRTAG], "(start)",nmem);
+ checkterm(rsetarray[ATTRTAG], "(start)", nmem);
info->attrterm = rsetarray[ATTRTAG]->term;
n = 4;
}
info->attrterm = NULL;
n = 3;
}
- info->andset = rsmulti_and_create( nmem, kcontrol, scope, n, rsetarray);
+ rnew->no_children = 1;
+ rnew->children = nmem_malloc(rnew->nmem, sizeof(RSET *));
+ rnew->children[0] = rsmulti_and_create(nmem, kcontrol,
+ scope, n, rsetarray);
rnew->priv = info;
- yaz_log(log_level,"create rset at %p",rnew);
+ yaz_log(log_level, "create rset at %p", rnew);
return rnew;
}
-
static void r_delete(RSET ct)
{
- struct rset_between_info *info = (struct rset_between_info *) ct->priv;
- yaz_log(log_level,"delete rset at %p",ct);
- rset_delete(info->andset);
}
static RSFD r_open(RSET ct, int flag)
{
- struct rset_between_info *info = (struct rset_between_info *) ct->priv;
RSFD rfd;
struct rset_between_rfd *p;
else {
p = (struct rset_between_rfd *) nmem_malloc(ct->nmem, (sizeof(*p)));
rfd->priv = p;
- p->recbuf = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
- p->startbuf = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
- p->attrbuf = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
+ p->recbuf = nmem_malloc(ct->nmem, ct->keycontrol->key_size);
+ p->startbuf = nmem_malloc(ct->nmem, ct->keycontrol->key_size);
+ p->attrbuf = nmem_malloc(ct->nmem, ct->keycontrol->key_size);
}
- p->andrfd = rset_open(info->andset, RSETF_READ);
- p->hits=-1;
+ p->andrfd = rset_open(ct->children[0], RSETF_READ);
+ p->hits = -1;
p->depth = 0;
p->attrdepth = 0;
p->attrbufok = 0;
p->startbufok = 0;
- yaz_log(log_level,"open rset=%p rfd=%p", ct, rfd);
+ yaz_log(log_level, "open rset=%p rfd=%p", ct, rfd);
return rfd;
}
struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
yaz_log(log_level,"close rfd=%p", rfd);
rset_close(p->andrfd);
- rfd_delete_base(rfd);
}
-
-
static int r_forward(RSFD rfd, void *buf,
TERMID *term, const void *untilbuf)
{
return rc;
}
-
-
static void checkattr(RSFD rfd)
{
struct rset_between_info *info =(struct rset_between_info *)
}
}
-
static int r_read(RSFD rfd, void *buf, TERMID *term)
{
struct rset_between_info *info =
const struct rset_key_control *kctrl = rfd->rset->keycontrol;
int cmp;
TERMID dummyterm = 0;
- yaz_log(log_level,"== read: term=%p",term);
+ yaz_log(log_level, "== read: term=%p",term);
if (!term)
- term=&dummyterm;
- while ( rset_read(p->andrfd,buf,term) )
+ term = &dummyterm;
+ while (rset_read(p->andrfd, buf, term))
{
yaz_log(log_level,"read loop term=%p d=%d ad=%d",
- *term,p->depth, p->attrdepth);
+ *term, p->depth, p->attrdepth);
if (p->hits<0)
{/* first time? */
- memcpy(p->recbuf,buf,kctrl->key_size);
+ memcpy(p->recbuf, buf, kctrl->key_size);
p->hits = 0;
cmp = rfd->rset->scope; /* force newrecord */
}
else {
- cmp=(kctrl->cmp)(buf,p->recbuf);
- yaz_log(log_level, "cmp=%d",cmp);
+ cmp = (kctrl->cmp)(buf, p->recbuf);
+ yaz_log(log_level, "cmp=%d", cmp);
}
if (cmp>=rfd->rset->scope)
{
- yaz_log(log_level,"new record");
+ yaz_log(log_level, "new record");
p->depth = 0;
p->attrdepth = 0;
- memcpy(p->recbuf,buf,kctrl->key_size);
+ memcpy(p->recbuf, buf, kctrl->key_size);
}
if (*term)
- yaz_log(log_level," term: '%s'", (*term)->name);
+ yaz_log(log_level, " term: '%s'", (*term)->name);
if (*term==info->startterm)
{
p->depth++;
- yaz_log(log_level,"read start tag. d=%d",p->depth);
- memcpy(p->startbuf,buf,kctrl->key_size);
+ yaz_log(log_level, "read start tag. d=%d", p->depth);
+ memcpy(p->startbuf, buf, kctrl->key_size);
p->startbufok = 1;
checkattr(rfd); /* in case we already saw the attr here */
}
if (p->depth == p->attrdepth)
p->attrdepth = 0; /* ending the tag with attr match */
p->depth--;
- yaz_log(log_level,"read end tag. d=%d ad=%d",p->depth, p->attrdepth);
+ yaz_log(log_level,"read end tag. d=%d ad=%d", p->depth,
+ p->attrdepth);
}
else if (*term==info->attrterm)
{
yaz_log(log_level,"read attr");
- memcpy(p->attrbuf,buf,kctrl->key_size);
+ memcpy(p->attrbuf, buf, kctrl->key_size);
p->attrbufok = 1;
checkattr(rfd); /* in case the start tag came first */
}
{
p->hits++;
yaz_log(log_level,"got a hit h="ZINT_FORMAT" d=%d ad=%d",
- p->hits,p->depth,p->attrdepth);
+ p->hits, p->depth, p->attrdepth);
return 1; /* we have everything in place already! */
} else
yaz_log(log_level, "Ignoring hit. h="ZINT_FORMAT" d=%d ad=%d",
- p->hits,p->depth,p->attrdepth);
+ p->hits, p->depth, p->attrdepth);
}
} /* while read */
static void r_pos(RSFD rfd, double *current, double *total)
{
struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
- rset_pos(p->andrfd,current, total);
- yaz_log(log_level,"pos: %0.1f/%0.1f ", *current, *total);
+ rset_pos(p->andrfd, current, total);
+ yaz_log(log_level, "pos: %0.1f/%0.1f ", *current, *total);
}
static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
{
- struct rset_between_info *info = (struct rset_between_info *) ct->priv;
- rset_getterms(info->andset, terms, maxterms, curterm);
+ rset_getterms(ct->children[0], terms, maxterms, curterm);
}
-/* $Id: rsbool.c,v 1.55 2005-05-03 09:11:36 adam Exp $
+/* $Id: rsbool.c,v 1.56 2005-05-24 11:35:43 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
struct rset_key_control *kcontrol,
int scope, RSET rset_l, RSET rset_r)
{
- RSET rnew = rset_create_base(ctrl, nmem, kcontrol, scope, 0);
+ RSET children[2], rnew;
+
+ children[0] = rset_l;
+ children[1] = rset_r;
+ rnew = rset_create_base(ctrl, nmem, kcontrol, scope, 0, 2, children);
struct rset_private *info;
info = (struct rset_private *) nmem_malloc(rnew->nmem, sizeof(*info));
info->rset_l = rset_l;
static void r_delete(RSET ct)
{
- struct rset_private *info = (struct rset_private *) ct->priv;
- rset_delete (info->rset_l);
- rset_delete (info->rset_r);
}
static RSFD r_open(RSET ct, int flag)
rset_close (prfd->rfd_l);
rset_close (prfd->rfd_r);
- rfd_delete_base(rfd);
}
static int r_forward(RSFD rfd, void *buf, TERMID *term,
-/* $Id: rset.c,v 1.45 2005-05-03 09:11:36 adam Exp $
+/* $Id: rset.c,v 1.46 2005-05-24 11:35:43 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
else
{
rnew = nmem_malloc(rs->nmem, sizeof(*rnew));
+ rnew->counted_buf = nmem_malloc(rs->nmem, rs->keycontrol->key_size);
rnew->priv = 0;
rnew->rset = rs;
yaz_log(log_level, "rfd_create_base (new): rfd=%p rs=%p fl=%p priv=%p",
}
rnew->next = rs->use_list;
rs->use_list = rnew;
+ rnew->counted_items = 0;
return rnew;
}
*
* puts an rfd into the freelist of the rset. Only when the rset gets
* deleted, will all the nmem disappear */
-void rfd_delete_base(RSFD rfd)
+void rset_close(RSFD rfd)
{
RSFD *pfd;
RSET rs = rfd->rset;
+
+ if (rs->hits_count == 0)
+ {
+ TERMID termid;
+ char buf[100];
+ while(rfd->counted_items < rs->hits_limit
+ && rset_default_read(rfd, buf, &termid))
+ ;
+
+ rs->hits_count = rfd->counted_items;
+ rs->hits_approx = 0;
+ if (rs->hits_count >= rs->hits_limit)
+ {
+ double cur, tot;
+ zint est;
+ rset_pos(rfd, &cur, &tot);
+ if (tot > 0) {
+ int i;
+ double ratio = cur/tot;
+ est = (zint)(0.5 + rs->hits_count / ratio);
+ yaz_log(log_level, "Estimating hits (%s) "
+ "%0.1f->" ZINT_FORMAT
+ "; %0.1f->" ZINT_FORMAT,
+ rs->control->desc,
+ cur, rs->hits_count,
+ tot, est);
+ i = 0; /* round to significant digits */
+ while (est > rs->hits_round) {
+ est /= 10;
+ i++;
+ }
+ while (i--)
+ est *= 10;
+ rs->hits_count = est;
+ rs->hits_approx = 1;
+ }
+ }
+ yaz_log(log_level, "rset_close p=%p count=" ZINT_FORMAT, rs,
+ rs->hits_count);
+ }
+ (*rs->control->f_close)(rfd);
+
yaz_log(log_level, "rfd_delete_base: rfd=%p rs=%p priv=%p fl=%p",
rfd, rs, rfd->priv, rs->free_list);
for (pfd = &rs->use_list; *pfd; pfd = &(*pfd)->next)
RSET rset_create_base(const struct rset_control *sel,
NMEM nmem, struct rset_key_control *kcontrol,
- int scope, TERMID term)
+ int scope, TERMID term,
+ int no_children, RSET *children)
{
- RSET rnew;
+ RSET rset;
NMEM M;
- /* assert(nmem); */ /* can not yet be used, api/t4 fails */
+ assert(nmem); /* can not yet be used, api/t4 fails */
if (!log_level_initialized)
{
log_level = yaz_log_module_level("rset");
M = nmem;
else
M = nmem_create();
- rnew = (RSET) nmem_malloc(M, sizeof(*rnew));
- yaz_log(log_level, "rs_create(%s) rs=%p (nm=%p)", sel->desc, rnew, nmem);
- rnew->nmem = M;
+ rset = (RSET) nmem_malloc(M, sizeof(*rset));
+ yaz_log(log_level, "rs_create(%s) rs=%p (nm=%p)", sel->desc, rset, nmem);
+ rset->nmem = M;
if (nmem)
- rnew->my_nmem = 0;
+ rset->my_nmem = 0;
else
- rnew->my_nmem = 1;
- rnew->control = sel;
- rnew->count = 1; /* refcount! */
- rnew->priv = 0;
- rnew->free_list = NULL;
- rnew->use_list = NULL;
- rnew->keycontrol = kcontrol;
+ rset->my_nmem = 1;
+ rset->control = sel;
+ rset->refcount = 1;
+ rset->priv = 0;
+ rset->free_list = NULL;
+ rset->use_list = NULL;
+ rset->hits_count = 0;
+ rset->hits_limit = 1000;
+ rset->hits_round = 1000;
+ rset->keycontrol = kcontrol;
(*kcontrol->inc)(kcontrol);
- rnew->scope = scope;
- rnew->term = term;
+ rset->scope = scope;
+ rset->term = term;
if (term)
- term->rset = rnew;
- return rnew;
+ term->rset = rset;
+
+ rset->no_children = no_children;
+ rset->children = 0;
+ if (no_children)
+ {
+ rset->children = (RSET*)
+ nmem_malloc(rset->nmem, no_children*sizeof(RSET *));
+ memcpy(rset->children, children, no_children*sizeof(RSET *));
+ }
+ return rset;
}
-void rset_delete (RSET rs)
+void rset_delete(RSET rs)
{
- (rs->count)--;
- yaz_log(log_level, "rs_delete(%s), rs=%p, count=%d",
- rs->control->desc, rs, rs->count);
- if (!rs->count)
+ (rs->refcount)--;
+ yaz_log(log_level, "rs_delete(%s), rs=%p, refcount=%d",
+ rs->control->desc, rs, rs->refcount);
+ if (!rs->refcount)
{
+ int i;
if (rs->use_list)
yaz_log(YLOG_WARN, "rs_delete(%s) still has RFDs in use",
rs->control->desc);
+ for (i = 0; i<rs->no_children; i++)
+ rset_delete(rs->children[i]);
(*rs->control->f_delete)(rs);
(*rs->keycontrol->dec)(rs->keycontrol);
if (rs->my_nmem)
RSET rset_dup (RSET rs)
{
- (rs->count)++;
- yaz_log(log_level, "rs_dup(%s), rs=%p, count=%d",
- rs->control->desc, rs, rs->count);
+ (rs->refcount)++;
+ yaz_log(log_level, "rs_dup(%s), rs=%p, refcount=%d",
+ rs->control->desc, rs, rs->refcount);
(*rs->keycontrol->inc)(rs->keycontrol);
return rs;
}
-int rset_default_forward(RSFD rfd, void *buf, TERMID *term,
- const void *untilbuf)
-{
- int more = 1;
- int cmp = rfd->rset->scope;
- if (log_level)
- {
- yaz_log (log_level, "rset_default_forward starting '%s' (ct=%p rfd=%p)",
- rfd->rset->control->desc, rfd->rset, rfd);
- /* key_logdump(log_level, untilbuf); */
- }
- while (cmp>=rfd->rset->scope && more)
- {
- if (log_level) /* time-critical, check first */
- yaz_log(log_level, "rset_default_forward looping m=%d c=%d",
- more, cmp);
- more = rset_read(rfd, buf, term);
- if (more)
- cmp = (rfd->rset->keycontrol->cmp)(untilbuf, buf);
- }
- if (log_level)
- yaz_log (log_level, "rset_default_forward exiting m=%d c=%d",
- more, cmp);
-
- return more;
-}
-
/**
* rset_count uses rset_pos to get the total and returns that.
* This is ok for rsisamb/c/s, and for some other rsets, but in case of
return (zint) tot;
}
-
/** rset_get_no_terms is a getterms function for those that don't have any */
void rset_get_no_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
{
}
/* rset_get_one_term gets that one term from an rset. Used by rsisamX */
-void rset_get_one_term(RSET ct,TERMID *terms,int maxterms,int *curterm)
+void rset_get_one_term(RSET ct, TERMID *terms, int maxterms, int *curterm)
{
if (ct->term)
{
}
-TERMID rset_term_create (const char *name, int length, const char *flags,
- int type, NMEM nmem)
+TERMID rset_term_create(const char *name, int length, const char *flags,
+ int type, NMEM nmem)
{
TERMID t;
t->rset = 0;
return t;
}
+
+int rset_default_read(RSFD rfd, void *buf, TERMID *term)
+{
+ RSET rset = rfd->rset;
+ int rc = (*rset->control->f_read)(rfd, buf, term);
+ if (rc > 0)
+ {
+ if (rfd->counted_items == 0 ||
+ (rset->keycontrol->cmp)(buf, rfd->counted_buf) >= rset->scope)
+ {
+ memcpy(rfd->counted_buf, buf, rset->keycontrol->key_size);
+ rfd->counted_items++;
+ }
+ }
+ return rc;
+}
+
+int rset_default_forward(RSFD rfd, void *buf, TERMID *term,
+ const void *untilbuf)
+{
+ RSET rset = rfd->rset;
+ int more;
+
+ if (rset->control->f_forward &&
+ rfd->counted_items >= rset->hits_limit)
+ {
+ assert (rset->control->f_forward != rset_default_forward);
+ return rset->control->f_forward(rfd, buf, term, untilbuf);
+ }
+
+ while ((more = rset_read(rfd, buf, term)) > 0)
+ {
+ if ((rfd->rset->keycontrol->cmp)(untilbuf, buf) <= 1)
+ break;
+ }
+ if (log_level)
+ yaz_log (log_level, "rset_default_forward exiting m=%d c=%d",
+ more, rset->scope);
+
+ return more;
+}
+
+void rset_visit(RSET rset, int level)
+{
+ int i;
+ yaz_log(YLOG_LOG, "%*s%c " ZINT_FORMAT, level, "",
+ rset->hits_approx ? '~' : '=',
+ rset->hits_count);
+ for (i = 0; i<rset->no_children; i++)
+ rset_visit(rset->children[i], level+1);
+}
+
-/* $Id: rsisamb.c,v 1.33 2005-05-03 09:11:36 adam Exp $
+/* $Id: rsisamb.c,v 1.34 2005-05-24 11:35:43 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
{
RSET rnew = rset_create_base(
kcontrol->filter_func ? &control_filter : &control,
- nmem, kcontrol, scope, term);
+ nmem, kcontrol, scope, term, 0, 0);
struct rset_private *info;
if (!log_level_initialized)
{
{
struct rfd_private *ptinfo = (struct rfd_private *)(rfd->priv);
isamb_pp_close (ptinfo->pt);
- rfd_delete_base(rfd);
yaz_log(log_level, "rsisamb_close");
}
-/* $Id: rsisamc.c,v 1.39 2005-05-03 09:11:36 adam Exp $
+/* $Id: rsisamc.c,v 1.40 2005-05-24 11:35:43 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
rset_get_one_term,
r_open,
r_close,
- rset_default_forward,
+ 0, /* no forward */
r_pos,
r_read,
r_write,
int scope,
ISAMC is, ISAM_P pos, TERMID term)
{
- RSET rnew = rset_create_base(&control, nmem, kcontrol, scope,term);
+ RSET rnew = rset_create_base(&control, nmem, kcontrol, scope, term, 0, 0);
struct rset_isamc_info *info;
if (!log_level_initialized)
{
log_level = yaz_log_module_level("rsisamc");
log_level_initialized = 1;
}
- info = (struct rset_isamc_info *) nmem_malloc(rnew->nmem,sizeof(*info));
+ info = (struct rset_isamc_info *) nmem_malloc(rnew->nmem, sizeof(*info));
info->is = is;
info->pos = pos;
rnew->priv = info;
struct rset_pp_info *p = (struct rset_pp_info *)(rfd->priv);
isamc_pp_close(p->pt);
- rfd_delete_base(rfd);
}
-/* $Id: rsisams.c,v 1.22 2005-05-03 09:11:36 adam Exp $
+/* $Id: rsisams.c,v 1.23 2005-05-24 11:35:43 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
rset_get_one_term,
r_open,
r_close,
- rset_default_forward,
+ 0, /* no foward */
r_pos,
r_read,
r_write,
int scope,
ISAMS is, ISAM_P pos, TERMID term)
{
- RSET rnew = rset_create_base(&control, nmem, kcontrol, scope, term);
+ RSET rnew = rset_create_base(&control, nmem, kcontrol, scope, term, 0, 0);
struct rset_private *info;
info = (struct rset_private *) nmem_malloc(rnew->nmem,sizeof(*info));
rnew->priv = info;
static void r_delete (RSET ct)
{
yaz_log (YLOG_DEBUG, "rsisams_delete");
- rset_delete(ct);
}
-
RSFD r_open (RSET ct, int flag)
{
struct rset_private *info = (struct rset_private *) ct->priv;
struct rfd_private *ptinfo = (struct rfd_private *)(rfd->priv);
isams_pp_close (ptinfo->pt);
- rfd_delete_base(rfd);
}
-/* $Id: rsmultiandor.c,v 1.18 2005-05-18 11:47:50 adam Exp $
+/* $Id: rsmultiandor.c,v 1.19 2005-05-24 11:35:43 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
struct rset_private {
- int no_rsets;
- RSET *rsets;
+ int dummy;
};
static RSET rsmulti_andor_create(NMEM nmem,
struct rset_key_control *kcontrol,
- int scope, int no_rsets, RSET* rsets,
+ int scope, TERMID termid,
+ int no_rsets, RSET* rsets,
const struct rset_control *ctrl)
{
- RSET rnew = rset_create_base(ctrl, nmem, kcontrol, scope, 0);
+ RSET rnew = rset_create_base(ctrl, nmem, kcontrol, scope, termid,
+ no_rsets, rsets);
struct rset_private *info;
if (!log_level_initialized)
{
log_level_initialized = 1;
}
yaz_log(log_level, "rsmultiand_andor_create scope=%d", scope);
- info = (struct rset_private *) nmem_malloc(rnew->nmem,sizeof(*info));
- info->no_rsets = no_rsets;
- info->rsets = (RSET*)nmem_malloc(rnew->nmem, no_rsets*sizeof(*rsets));
- memcpy(info->rsets,rsets,no_rsets*sizeof(*rsets));
+ info = (struct rset_private *) nmem_malloc(rnew->nmem, sizeof(*info));
rnew->priv = info;
return rnew;
}
RSET rsmulti_or_create(NMEM nmem, struct rset_key_control *kcontrol,
- int scope, int no_rsets, RSET* rsets)
+ int scope, TERMID termid, int no_rsets, RSET* rsets)
{
- return rsmulti_andor_create(nmem, kcontrol, scope,
+ return rsmulti_andor_create(nmem, kcontrol, scope, termid,
no_rsets, rsets, &control_or);
}
RSET rsmulti_and_create(NMEM nmem, struct rset_key_control *kcontrol,
int scope, int no_rsets, RSET* rsets)
{
- return rsmulti_andor_create(nmem, kcontrol, scope,
+ return rsmulti_andor_create(nmem, kcontrol, scope, 0,
no_rsets, rsets, &control_and);
}
static void r_delete (RSET ct)
{
- struct rset_private *info = (struct rset_private *) ct->priv;
- int i;
- for(i = 0; i<info->no_rsets; i++)
- rset_delete(info->rsets[i]);
}
-
/* Opening and closing fd's on them *********************/
static RSFD r_open_andor (RSET ct, int flag, int is_and)
{
RSFD rfd;
struct rfd_private *p;
- struct rset_private *info = (struct rset_private *) ct->priv;
const struct rset_key_control *kctrl = ct->keycontrol;
int i;
p->h = 0;
p->tailbits = 0;
if (is_and)
- p->tailbits = nmem_malloc(ct->nmem, info->no_rsets*sizeof(char) );
+ p->tailbits = nmem_malloc(ct->nmem, ct->no_children*sizeof(char) );
else
- p->h = heap_create( ct->nmem, info->no_rsets, kctrl);
- p->items=(struct heap_item *) nmem_malloc(ct->nmem,
- info->no_rsets*sizeof(*p->items));
- for (i = 0; i<info->no_rsets; i++)
+ p->h = heap_create( ct->nmem, ct->no_children, kctrl);
+ p->items = (struct heap_item *)
+ nmem_malloc(ct->nmem, ct->no_children*sizeof(*p->items));
+ for (i = 0; i<ct->no_children; i++)
{
- p->items[i].rset = info->rsets[i];
+ p->items[i].rset = ct->children[i];
p->items[i].buf = nmem_malloc(ct->nmem, kctrl->key_size);
}
}
p->tailcount = 0;
if (is_and)
{ /* read the array and sort it */
- for (i = 0; i<info->no_rsets; i++){
- p->items[i].fd = rset_open(info->rsets[i],RSETF_READ);
+ for (i = 0; i<ct->no_children; i++){
+ p->items[i].fd = rset_open(ct->children[i], RSETF_READ);
if (!rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
p->eof = 1;
p->tailbits[i] = 0;
}
- qsort(p->items, info->no_rsets, sizeof(p->items[0]), compare_ands);
+ qsort(p->items, ct->no_children, sizeof(p->items[0]), compare_ands);
} else
{ /* fill the heap for ORing */
- for (i = 0; i<info->no_rsets; i++){
- p->items[i].fd = rset_open(info->rsets[i],RSETF_READ);
+ for (i = 0; i<ct->no_children; i++){
+ p->items[i].fd = rset_open(ct->children[i],RSETF_READ);
if ( rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
heap_insert(p->h, &(p->items[i]));
}
static void r_close (RSFD rfd)
{
- struct rset_private *info=
- (struct rset_private *)(rfd->rset->priv);
struct rfd_private *p=(struct rfd_private *)(rfd->priv);
int i;
if (p->h)
heap_destroy (p->h);
- for (i = 0; i<info->no_rsets; i++)
+ for (i = 0; i<rfd->rset->no_children; i++)
if (p->items[i].fd)
rset_close(p->items[i].fd);
- rfd_delete_base(rfd);
}
static int r_forward_or(RSFD rfd, void *buf,
- TERMID *term,const void *untilbuf)
+ TERMID *term, const void *untilbuf)
{ /* while heap head behind untilbuf, forward it and rebalance heap */
struct rfd_private *p = rfd->priv;
const struct rset_key_control *kctrl = rfd->rset->keycontrol;
}
}
- return r_read_or(rfd,buf,term);
+ return r_read_or(rfd, buf, term);
}
static int r_read_or (RSFD rfd, void *buf, TERMID *term)
{
+ RSET rset = rfd->rset;
struct rfd_private *mrfd = rfd->priv;
- const struct rset_key_control *kctrl = rfd->rset->keycontrol;
+ const struct rset_key_control *kctrl = rset->keycontrol;
struct heap_item *it;
int rdres;
if (heap_empty(mrfd->h))
return 0;
it = mrfd->h->heap[1];
- memcpy(buf,it->buf, kctrl->key_size);
+ memcpy(buf, it->buf, kctrl->key_size);
if (term)
- *term = it->term;
+ {
+ if (rset->term)
+ *term = rset->term;
+ else
+ *term = it->term;
+ assert(*term);
+ }
(mrfd->hits)++;
rdres = rset_read(it->fd, it->buf, &it->term);
if ( rdres )
/* value. Mark all as being in the tail. Read next from that */
/* item, and if not in the same record, clear its tail bit */
struct rfd_private *p = rfd->priv;
- const struct rset_key_control *kctrl = rfd->rset->keycontrol;
- struct rset_private *info = rfd->rset->priv;
+ RSET ct = rfd->rset;
+ const struct rset_key_control *kctrl = ct->keycontrol;
int i, mintail;
int cmp;
if (p->tailcount)
{ /* we are tailing, find lowest tail and return it */
mintail = 0;
- while ((mintail<info->no_rsets) && !p->tailbits[mintail])
+ while ((mintail<ct->no_children) && !p->tailbits[mintail])
mintail++; /* first tail */
- for (i = mintail+1; i<info->no_rsets; i++)
+ for (i = mintail+1; i<ct->no_children; i++)
{
if (p->tailbits[i])
{
&p->items[mintail].term))
{
p->eof = 1; /* game over, once tails have been returned */
- p->tailbits[mintail]=0;
+ p->tailbits[mintail] = 0;
(p->tailcount)--;
(p->hits)++;
return 1;
}
/* still a tail? */
- cmp=(*kctrl->cmp)(p->items[mintail].buf,buf);
+ cmp = (*kctrl->cmp)(p->items[mintail].buf,buf);
if (cmp >= rfd->rset->scope){
- p->tailbits[mintail]=0;
+ p->tailbits[mintail] = 0;
(p->tailcount)--;
}
(p->hits)++;
if (p->eof)
return 0; /* nothing more to see */
i = 1; /* assume items[0] is highest up */
- while (i<info->no_rsets) {
- cmp=(*kctrl->cmp)(p->items[0].buf,p->items[i].buf);
- if (cmp<=-rfd->rset->scope) { /* [0] was behind, forward it */
+ while (i<ct->no_children) {
+ cmp = (*kctrl->cmp)(p->items[0].buf, p->items[i].buf);
+ if (cmp <= -rfd->rset->scope) { /* [0] was behind, forward it */
if (!rset_forward(p->items[0].fd, p->items[0].buf,
&p->items[0].term, p->items[i].buf))
{
/* if we get this far, all rsets are now within +- scope of [0] */
/* ergo, we have a hit. Mark them all as tailing, and let the */
/* upper 'if' return the hits in right order */
- for (i = 0; i<info->no_rsets; i++)
+ for (i = 0; i<ct->no_children; i++)
p->tailbits[i] = 1;
- p->tailcount = info->no_rsets;
+ p->tailcount = ct->no_children;
} /* while 1 */
}
const void *untilbuf)
{
struct rfd_private *p = rfd->priv;
- const struct rset_key_control *kctrl = rfd->rset->keycontrol;
- struct rset_private *info = rfd->rset->priv;
+ RSET ct = rfd->rset;
+ const struct rset_key_control *kctrl = ct->keycontrol;
int i;
int cmp;
int killtail = 0;
- for (i = 0; i<info->no_rsets; i++)
+ for (i = 0; i<ct->no_children; i++)
{
cmp = (*kctrl->cmp)(p->items[i].buf,untilbuf);
if (cmp <= -rfd->rset->scope)
}
if (killtail)
{
- for (i = 0; i<info->no_rsets; i++)
+ for (i = 0; i<ct->no_children; i++)
p->tailbits[i] = 0;
p->tailcount = 0;
}
static void r_pos (RSFD rfd, double *current, double *total)
{
- struct rset_private *info =
- (struct rset_private *)(rfd->rset->priv);
+ RSET ct = rfd->rset;
struct rfd_private *mrfd =
(struct rfd_private *)(rfd->priv);
double cur, tot;
double scur = 0.0, stot = 0.0;
int i;
- for (i = 0; i<info->no_rsets; i++){
+ for (i = 0; i<ct->no_children; i++){
rset_pos(mrfd->items[i].fd, &cur, &tot);
yaz_log(log_level, "r_pos: %d %0.1f %0.1f", i, cur,tot);
scur += cur;
}
static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
- /* Special case: Some multi-ors have all terms pointing to the same */
- /* term. We do not want to duplicate those. Other multiors (and ands) */
- /* have different terms under them. Those we want. */
{
- struct rset_private *info =
- (struct rset_private *) ct->priv;
- int firstterm= *curterm;
- int i;
- for (i = 0; i<info->no_rsets; i++)
+ if (ct->term)
+ rset_get_one_term(ct, terms, maxterms, curterm);
+ else
{
- rset_getterms(info->rsets[i], terms, maxterms, curterm);
- if ( ( *curterm > firstterm+1 ) &&
- ( *curterm <= maxterms ) &&
- ( terms[(*curterm)-1] == terms[firstterm] )
- )
- (*curterm)--; /* forget the term, seen that before */
+ /* Special case: Some multi-ors have all terms pointing to the same
+ term. We do not want to duplicate those. Other multiors (and ands)
+ have different terms under them. Those we want.
+ */
+ int firstterm= *curterm;
+ int i;
+
+ for (i = 0; i<ct->no_children; i++)
+ {
+ rset_getterms(ct->children[i], terms, maxterms, curterm);
+ if ( ( *curterm > firstterm+1 ) &&
+ ( *curterm <= maxterms ) &&
+ ( terms[(*curterm)-1] == terms[firstterm] )
+ )
+ (*curterm)--; /* forget the term, seen that before */
+ }
}
}
-/* $Id: rsnull.c,v 1.34 2005-05-03 09:11:36 adam Exp $
+/* $Id: rsnull.c,v 1.35 2005-05-24 11:35:43 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
rset_get_no_terms,
r_open,
r_close,
- rset_default_forward,
+ 0, /* no forward */
r_pos,
r_read,
r_write,
};
-RSET rsnull_create(NMEM nmem, struct rset_key_control *kcontrol )
+RSET rsnull_create(NMEM nmem, struct rset_key_control *kcontrol,
+ TERMID term)
{
- RSET rnew = rset_create_base(&control, nmem, kcontrol, 0, 0);
+ RSET rnew = rset_create_base(&control, nmem, kcontrol, 0, term, 0, 0);
rnew->priv = 0;
return rnew;
}
static void r_close(RSFD rfd)
{
- rfd_delete_base(rfd);
}
static void r_delete(RSET ct)
assert(rfd);
assert(current);
assert(total);
- *total=0;
- *current=0;
+ *total = 0;
+ *current = 0;
}
static int r_read(RSFD rfd, void *buf, TERMID *term)
static int r_write(RSFD rfd, const void *buf)
{
- yaz_log (YLOG_FATAL, "NULL set type is read-only");
+ yaz_log(YLOG_FATAL, "NULL set type is read-only");
return -1;
}
-/* $Id: rsprox.c,v 1.28 2005-05-03 09:11:36 adam Exp $
+/* $Id: rsprox.c,v 1.29 2005-05-24 11:35:43 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
};
struct rset_prox_info {
- RSET *rset; /* array of 'child' rsets */
- int rset_no; /* how many of them */
int ordered;
int exclusion;
int relation;
int ordered, int exclusion,
int relation, int distance)
{
- RSET rnew = rset_create_base(&control, nmem, kcontrol, scope,0);
+ RSET rnew = rset_create_base(&control, nmem, kcontrol, scope, 0,
+ rset_no, rset);
struct rset_prox_info *info;
info = (struct rset_prox_info *) nmem_malloc(rnew->nmem,sizeof(*info));
- info->rset = nmem_malloc(rnew->nmem,rset_no * sizeof(*info->rset));
- memcpy(info->rset, rset,
- rset_no * sizeof(*info->rset));
- info->rset_no = rset_no;
info->ordered = ordered;
info->exclusion = exclusion;
info->relation = relation;
static void r_delete (RSET ct)
{
- struct rset_prox_info *info = (struct rset_prox_info *) ct->priv;
- int i;
-
- for (i = 0; i<info->rset_no; i++)
- rset_delete(info->rset[i]);
}
static RSFD r_open (RSET ct, int flag)
{
- struct rset_prox_info *info = (struct rset_prox_info *) ct->priv;
RSFD rfd;
struct rset_prox_rfd *p;
int i;
}
rfd = rfd_create_base(ct);
if (rfd->priv)
- p=(struct rset_prox_rfd *)(rfd->priv);
+ p = (struct rset_prox_rfd *)(rfd->priv);
else {
p = (struct rset_prox_rfd *) nmem_malloc(ct->nmem,sizeof(*p));
rfd->priv = p;
- p->more = nmem_malloc (ct->nmem,sizeof(*p->more) * info->rset_no);
- p->buf = nmem_malloc(ct->nmem,sizeof(*p->buf) * info->rset_no);
- p->terms = nmem_malloc(ct->nmem,sizeof(*p->terms) * info->rset_no);
- for (i = 0; i < info->rset_no; i++)
+ p->more = nmem_malloc (ct->nmem,sizeof(*p->more) * ct->no_children);
+ p->buf = nmem_malloc(ct->nmem,sizeof(*p->buf) * ct->no_children);
+ p->terms = nmem_malloc(ct->nmem,sizeof(*p->terms) * ct->no_children);
+ for (i = 0; i < ct->no_children; i++)
{
p->buf[i] = nmem_malloc(ct->nmem,ct->keycontrol->key_size);
p->terms[i] = 0;
}
- p->rfd = nmem_malloc(ct->nmem,sizeof(*p->rfd) * info->rset_no);
+ p->rfd = nmem_malloc(ct->nmem,sizeof(*p->rfd) * ct->no_children);
}
yaz_log(YLOG_DEBUG,"rsprox (%s) open [%p] n=%d",
- ct->control->desc, rfd, info->rset_no);
+ ct->control->desc, rfd, ct->no_children);
- for (i = 0; i < info->rset_no; i++) {
- p->rfd[i] = rset_open (info->rset[i], RSETF_READ);
+ for (i = 0; i < ct->no_children; i++) {
+ p->rfd[i] = rset_open (ct->children[i], RSETF_READ);
p->more[i] = rset_read (p->rfd[i], p->buf[i], &p->terms[i]);
}
p->hits = 0;
static void r_close (RSFD rfd)
{
- struct rset_prox_info *info = (struct rset_prox_info *)(rfd->rset->priv);
- struct rset_prox_rfd *p=(struct rset_prox_rfd *)(rfd->priv);
+ RSET ct = rfd->rset;
+ struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
int i;
- for (i = 0; i<info->rset_no; i++)
- rset_close (p->rfd[i]);
- rfd_delete_base(rfd);
+ for (i = 0; i<ct->no_children; i++)
+ rset_close(p->rfd[i]);
}
static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
{
- struct rset_prox_info *info = (struct rset_prox_info *)(rfd->rset->priv);
+ RSET ct = rfd->rset;
+ struct rset_prox_info *info = (struct rset_prox_info *)(ct->priv);
struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
- const struct rset_key_control *kctrl = rfd->rset->keycontrol;
+ const struct rset_key_control *kctrl = ct->keycontrol;
int cmp = 0;
int i;
{
while (p->more[0])
{
- for (i = 1; i < info->rset_no; i++)
+ for (i = 1; i < ct->no_children; i++)
{
if (!p->more[i])
{
break;
}
}
- if (i == info->rset_no)
+ if (i == ct->no_children)
{
memcpy (buf, p->buf[0], kctrl->key_size);
if (term)
}
}
}
- else if (info->rset_no == 2)
+ else if (ct->no_children == 2)
{
while (p->more[0] && p->more[1])
{
static void r_pos (RSFD rfd, double *current, double *total)
{
- struct rset_prox_info *info = (struct rset_prox_info *)(rfd->rset->priv);
+ RSET ct = rfd->rset;
struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
int i;
double r = 0.0;
yaz_log(YLOG_DEBUG, "rsprox_pos");
- for (i = 0; i < info->rset_no; i++)
+ for (i = 0; i < ct->no_children; i++)
{
rset_pos(p->rfd[i], &cur, &tot);
if (tot>0) {
i,*current, *total, r);
}
-
-
static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
{
- struct rset_prox_info *info =
- (struct rset_prox_info *) ct->priv;
int i;
- for (i = 0; i<info->rset_no; i++)
- rset_getterms(info->rset[i], terms, maxterms, curterm);
+ for (i = 0; i<ct->no_children; i++)
+ rset_getterms(ct->children[i], terms, maxterms, curterm);
}
-/* $Id: rstemp.c,v 1.63 2005-05-03 09:11:36 adam Exp $
+/* $Id: rstemp.c,v 1.64 2005-05-24 11:35:43 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
rset_get_one_term,
r_open,
r_close,
- rset_default_forward,
+ 0, /* no forward */
r_pos,
r_read,
r_write,
RSET rstemp_create(NMEM nmem, struct rset_key_control *kcontrol,
int scope, const char *temp_path, TERMID term)
{
- RSET rnew = rset_create_base(&control, nmem, kcontrol, scope, term);
+ RSET rnew = rset_create_base(&control, nmem, kcontrol, scope, term,
+ 0, 0);
struct rset_private *info;
if (!log_level_initialized)
{
info->fd = -1;
}
}
- rfd_delete_base(rfd);
}
-/* $Id: t10.c,v 1.7 2005-04-28 11:25:24 adam Exp $
+/* $Id: t10.c,v 1.8 2005-05-24 11:35:43 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
struct tst tests[] = {
- {"ntc-atn", "first title", 1000, "first title", 1000, "second title",862 },
- {"ntc-ntn", "first title", 1000, "first title", 1000, "second title",862 },
- {"ntc-btn", "first title", 1000, "first title", 1000, "second title",862 },
- {"ntc-apn", "first title", 1000, "first title", 1000, "second title",862 },
- {"ntc-npn", "first title", 1000, "first title", 1000, "second title",862 },
- {"ntc-bpn", "first title", 1000, "first title", 1000, "second title",862 },
-
- {"atc-atn", "first title", 1000, "first title", 1000, "second title", 989 },
- {"atc-ntn", "first title", 1000, "first title", 1000, "second title", 989 },
- {"atc-btn", "first title", 1000, "first title", 1000, "second title", 989 },
- {"atc-apn", "first title", 1000, "first title", 1000, "second title", 989 },
- {"atc-npn", "first title", 1000, "first title", 1000, "second title", 989 },
- {"atc-bpn", "first title", 1000, "first title", 1000, "second title", 989 },
-
- {"npc-atn", "first title", 1000, "first title", 1000, "second title", 862 },
- {"npc-ntn", "first title", 1000, "first title", 1000, "second title", 862 },
- {"npc-btn", "first title", 1000, "first title", 1000, "second title", 862 },
- {"npc-apn", "first title", 1000, "first title", 1000, "second title", 862 },
- {"npc-npn", "first title", 1000, "first title", 1000, "second title", 862 },
- {"npc-bpn", "first title", 1000, "first title", 1000, "second title", 862 },
-
- {"apc-atn", "first title", 1000, "first title", 1000, "second title", 989 },
- {"apc-ntn", "first title", 1000, "first title", 1000, "second title", 989 },
- {"apc-btn", "first title", 1000, "first title", 1000, "second title", 989 },
- {"apc-apn", "first title", 1000, "first title", 1000, "second title", 989 },
- {"apc-npn", "first title", 1000, "first title", 1000, "second title", 989 },
- {"apc-bpn", "first title", 1000, "first title", 1000, "second title", 989 },
-
+ {"ntc-atn", "first title", 0, "first title", 1000, "first title",1000 },
+ {"ntc-ntn", "first title", 0, "first title", 1000, "first title",1000 },
+ {"ntc-btn", "first title", 0, "first title", 1000, "first title",1000 },
+ {"ntc-apn", "first title", 0, "first title", 1000, "first title",1000 },
+ {"ntc-npn", "first title", 0, "first title", 1000, "first title",1000 },
+ {"ntc-bpn", "first title", 0, "first title", 1000, "first title",1000 },
+ {"atc-atn", "first title", 0, "first title", 1000, "first title", 1000 },
+ {"atc-ntn", "first title", 0, "first title", 1000, "first title", 1000 },
+ {"atc-btn", "first title", 0, "first title", 1000, "first title", 1000 },
+ {"atc-apn", "first title", 0, "first title", 1000, "first title", 1000 },
+ {"atc-npn", "first title", 0, "first title", 1000, "first title", 1000 },
+ {"atc-bpn", "first title", 0, "first title", 1000, "first title", 1000 },
+
+ {"npc-atn", "first title", 0, "first title", 1000, "first title", 1000 },
+ {"npc-ntn", "first title", 0, "first title", 1000, "first title", 1000 },
+ {"npc-btn", "first title", 0, "first title", 1000, "first title", 1000 },
+ {"npc-apn", "first title", 0, "first title", 1000, "first title", 1000 },
+ {"npc-npn", "first title", 0, "first title", 1000, "first title", 1000 },
+ {"npc-bpn", "first title", 0, "first title", 1000, "first title", 1000 },
+
+ {"apc-atn", "first title", 0, "first title", 1000, "first title", 1000 },
+ {"apc-ntn", "first title", 0, "first title", 1000, "first title", 1000 },
+ {"apc-btn", "first title", 0, "first title", 1000, "first title", 1000 },
+ {"apc-apn", "first title", 0, "first title", 1000, "first title", 1000 },
+ {"apc-npn", "first title", 0, "first title", 1000, "first title", 1000 },
+ {"apc-bpn", "first title", 0, "first title", 1000, "first title", 1000 },
{0,0,0,0,0,0,0},
};
ZebraService zs = start_up("zebrazv.cfg", argc, argv);
ZebraHandle zh = zebra_open (zs);
- int loglevel = yaz_log_mask_str("zvrank,rank1,t10");
init_data(zh, recs);
zebra_close(zh);
- yaz_log_init_level(loglevel);
for (i = 0; tests[i].schema; i++)
{
zh = zebra_open (zs);
zebra_select_database(zh, "Default");
zebra_set_resource(zh, "zvrank.weighting-scheme", tests[i].schema);
- yaz_log(loglevel,"============%d: %s ===========", i, tests[i].schema);
+ yaz_log(YLOG_LOG,"============%d: %s ===========", i, tests[i].schema);
ranking_query( __LINE__, zh, "@attr 1=1016 @attr 2=102 the",
3, tests[i].hit1, tests[i].score1);
-/* $Id: t9.c,v 1.5 2005-01-15 19:38:35 adam Exp $
+/* $Id: t9.c,v 1.6 2005-05-24 11:35:43 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
init_data(zh, recs);
qry( zh, "@attr 1=1016 @attr 2=102 the",
- 3, "first title", 872 );
+ 3, "first title", 997 );
qry( zh, "@attr 1=1016 @attr 2=102 foo",
3, "second title", 850 );
/* get the record with the most significant hit, that is the 'bar' */
/* as that is the rarest of my search words */
qry( zh, "@attr 1=1016 @attr 2=102 @or @or the foo bar",
- 3, "third title", 895 );
+ 3, "third title", 940 );
return close_down(zh, zs, 0);
}