-## $Id: Makefile.am,v 1.28 2004-09-27 10:44:49 adam Exp $
+## $Id: Makefile.am,v 1.29 2004-10-28 10:37:15 heikki Exp $
noinst_PROGRAMS = apitest kdump
libidzebra_api_la_SOURCES = dir.c dirs.c trav.c kinput.c kcompare.c \
attribute.c symtab.c recindex.c recstat.c lockutil.c \
zebraapi.c zinfo.c invstat.c sortidx.c compact.c zsets.c zrpn.c \
- rank1.c trunc.c retrieve.c extract.c livcode.c \
+ rank1.c trunc.c retrieve.c extract.c \
index.h recindex.h recindxp.h \
zinfo.h zserver.h zvrank.c
+++ /dev/null
-/*
-
-The University of Liverpool
-
-Modifications to Zebra 1.1 / YAZ 1.7 to enable ranking
-by attribute weight.
-
-Copyright (c) 2001-2002 The University of Liverpool. All
-rights reserved.
-
-Licensed under the Academic Free License version 1.1.
-http://opensource.org/licenses/academic.php
-
-$Id: livcode.c,v 1.4 2004-10-26 15:32:11 heikki Exp $
-
-*/
-
-#ifdef SKIPTHIS /* Need to fix the interface - FIXME */
-
-#include <stdlib.h>
-#include <stdio.h>
-#ifdef WIN32
-#include <process.h>
-#else
-#include <unistd.h>
-#endif
-#include <assert.h>
-
-#include "index.h"
-#include "zserver.h"
-
-/*
-** These functions/routines
-** 1. reads in and builds a linked list of rank attr/rank score pairs
-** 2. expand a simple query into a paired list of complex/simple nodes.
-*/
-
-typedef struct rstype
-{
- struct rstype *next_rsnode ;
- int rank ;
- int score ;
- char *rankstr ;
-} rsnode, *refrsnode ;
-
-refrsnode start_rsnode = NULL ;
-
-/*
-** Function/Routine prototypes
-*/
-static int search_for_score( char *rankstr ) ;
-static char *search_for_rankstr( int rank ) ;
-static int search_for_rank( int rank ) ;
-static refrsnode set_rsnode( int rank, int score ) ;
-static int read_zrank_file(ZebraHandle zh) ;
-
-static void convert_simple2complex(ZebraHandle zh, Z_RPNStructure *rpnstruct ) ;
-static void walk_complex_query(ZebraHandle zh, Z_RPNStructure *rpnstruct ) ;
-static Z_Complex *expand_query(ZebraHandle zh, Z_Operand *thisop ) ;
-static Z_Complex *set_1complex_1operand( Z_Complex *comp,Z_Operand *simp ) ;
-static Z_Complex *set_2operands( Z_Operand *sim1,Z_Operand *sim2 ) ;
-static Z_Operand *set_operand( Z_Operand *thisop, int newattr ) ;
-static int check_operand_attrs( Z_Operand *thisop ) ;
-
-/*
-** search_for_score()
-** given a rank-string traverse down the linked list ;
-** return its score if found otherwise return -1.
-*/
-int search_for_score( char *rankstr )
-{
- refrsnode node = start_rsnode ;
- int rank ;
-
- if ( sscanf( rankstr,"%d",&rank ) )
- {
- while ( node )
- {
- if ( node->rank == rank ) return node->score ;
- node = node->next_rsnode ;
- }
- }
- return -1 ;
-}
-
-/*
-** search_for_rankstr()
-** given a rank traverse down the linked list ;
-** return its string if found otherwise return NULL.
-*/
-char *search_for_rankstr( int rank )
-{
- refrsnode node = start_rsnode ;
-
- while ( node )
- {
- if ( node->rank == rank ) return node->rankstr ;
- node = node->next_rsnode ;
- }
- return "rank" ;
-}
-
-/*
-** search_for_rank()
-** given a rank traverse down the linked list ;
-** return 1 if found otherwise return 0.
-*/
-int search_for_rank( int rank )
-{
- refrsnode node = start_rsnode ;
-
- while ( node )
- {
- if ( node->rank == rank ) return 1 ;
- node = node->next_rsnode ;
- }
- return 0 ;
-}
-
-/*
-** set_rsnode()
-** given a rank and a score, build the rest of the rsnode structure.
-*/
-refrsnode set_rsnode( int rank, int score )
-{
-#define BUFFMAX 128
- refrsnode node = (refrsnode)malloc( sizeof(rsnode) ) ;
- char buff[BUFFMAX] ;
-
- node->next_rsnode = NULL ;
- node->rank = rank ;
- node->score = score ;
-
- sprintf( buff,"%d",rank ) ;
- node->rankstr = (char *)malloc( strlen(buff)+1 ) ;
- strcpy( node->rankstr, buff ) ;
-
- return node ;
-}
-
-/*
-** read_zrank_file(zh)
-** read in the rankfile and build the rank/score linked list ;
-** return 0 : can't open the zebra config. file
-** return 0 : can't find the rankfile entry in the zebra config. file
-** return 0 : can't open the rankfile itself
-** return the number of distinct ranks read in.
-*/
-int read_zrank_file(ZebraHandle zh)
-{
-#define LINEMAX 256
- char line[ LINEMAX ] ;
- char rname[ LINEMAX ] ;
- char *lineptr ;
- FILE *ifd ;
- int rank = 0 ;
- int score = 0 ;
- int numranks = 0 ;
-
- /*
- ** open the zebra configuration file and look for the "rankfile:"
- ** entry which contains the path/name of the rankfile
- */
-
- const char *rankfile = res_get_def(zh->res, "rankfile", 0);
- const char *profilePath = res_get_def(zh->res, "profilePath",
- DEFAULT_PROFILE_PATH);
-
- if (!rankfile)
- {
- yaz_log(LOG_LOG, "rankfile entry not found in config file" ) ;
- return 0 ;
- }
- ifd = yaz_path_fopen(profilePath, rankfile, "r" ) ;
- if ( ifd )
- {
- while ( (lineptr = fgets( line,LINEMAX,ifd )) )
- {
- if ( sscanf( lineptr,"rankfile: %s", rname ) == 1 )
- rankfile = rname ;
- }
-
- /*
- ** open the rankfile and read the rank/score pairs
- ** ignore 1016
- ** ignore duplicate ranks
- ** ignore ranks without +ve scores
- */
- if ( rankfile )
- {
- if ( !(ifd = fopen( rankfile, "r" )) )
- {
- logf( LOG_LOG, "unable to open rankfile %s",rankfile ) ;
- return 0;
- }
-
- while ( (lineptr = fgets( line,LINEMAX,ifd )) )
- {
- sscanf( lineptr,"%d : %d", &rank,&score ) ;
- if ( ( score > 0 ) && ( rank != 1016 ) )
- {
- refrsnode new_rsnode ;
-
- if ( search_for_rank( rank ) == 0 )
- {
- new_rsnode = set_rsnode( rank,score ) ;
- new_rsnode->next_rsnode = start_rsnode ;
- start_rsnode = new_rsnode ;
- numranks++ ;
- }
- }
- }
- }
- else
- {
- yaz_log(LOG_WARN|LOG_ERRNO, "unable to open config file (%s)",
- rankfile);
- }
- }
- return numranks ;
-}
-
-/*
-** set_operand()
-** build an operand "node" - hav to make a complete copy of thisop and
-** then insert newattr in the appropriate place
-**
-*/
-Z_Operand *set_operand( Z_Operand *thisop, int newattr )
-{
- Z_Operand *operand ;
- Z_AttributesPlusTerm *attributesplusterm ;
- Z_AttributeList *attributelist ;
- Z_AttributeElement *attributeelement ;
- Z_AttributeElement *attrptr ;
- Z_AttributeElement **attrptrptr ;
- Z_Term *term ;
- Odr_oct *general ;
- int i ;
-
- operand = (Z_Operand *)
- malloc( sizeof( Z_Operand ) ) ;
- attributesplusterm = (Z_AttributesPlusTerm *)
- malloc( sizeof( Z_AttributesPlusTerm ) ) ;
- attributelist = (Z_AttributeList *)
- malloc( sizeof( Z_AttributeList ) ) ;
- attributeelement = (Z_AttributeElement *)
- malloc( sizeof( Z_AttributeElement ) ) ;
- term = (Z_Term *)
- malloc( sizeof( Z_Term ) ) ;
- general = (Odr_oct *)
- malloc( sizeof( Odr_oct ) ) ;
-
- operand->which = Z_Operand_APT ;
- operand->u.attributesPlusTerm = attributesplusterm ;
-
- attributesplusterm->attributes = attributelist ;
- attributesplusterm->term = term ;
-
- attributelist->num_attributes = thisop->u.attributesPlusTerm->
- attributes->num_attributes ;
-
- attrptr = (Z_AttributeElement *) malloc( sizeof(Z_AttributeElement) *
- attributelist->num_attributes ) ;
- attrptrptr = (Z_AttributeElement **) malloc( sizeof(Z_AttributeElement) *
- attributelist->num_attributes ) ;
-
- attributelist->attributes = attrptrptr ;
-
- for ( i = 0 ; i < attributelist->num_attributes ; i++ )
- {
- *attrptr = *thisop->u.attributesPlusTerm->attributes->attributes[i] ;
-
- attrptr->attributeType = (int *)malloc( sizeof(int *) ) ;
- *attrptr->attributeType = *thisop->u.attributesPlusTerm->attributes->
- attributes[i]->attributeType;
-
- attrptr->value.numeric = (int *)malloc( sizeof(int *) ) ;
- *attrptr->value.numeric = *thisop->u.attributesPlusTerm->attributes->
- attributes[i]->value.numeric;
-
- if ( (*attrptr->attributeType == 1) &&
- (*attrptr->value.numeric == 1016) )
- {
- *attrptr->value.numeric = newattr ;
- }
- *attrptrptr++ = attrptr++ ;
- }
-
- term->which = Z_Term_general ;
- term->u.general = general ;
-
- general->len = thisop->u.attributesPlusTerm->term->u.general->len ;
- general->size = thisop->u.attributesPlusTerm->term->u.general->size ;
- general->buf = malloc( general->size ) ;
- strcpy( general->buf,
- thisop->u.attributesPlusTerm->term->u.general->buf ) ;
-
- return operand ;
-}
-
-/*
-** set_2operands()
-** build a complex "node" with two (simple) operand "nodes" as branches
-*/
-Z_Complex *set_2operands( Z_Operand *sim1,Z_Operand *sim2 )
-{
- Z_Complex *top ;
- Z_RPNStructure *s1 ;
- Z_RPNStructure *s2 ;
- Z_Operator *roperator ;
-
- top = (Z_Complex *) malloc( sizeof( Z_Complex ) ) ;
- s1 = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ;
- s2 = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ;
- roperator = (Z_Operator *) malloc( sizeof( Z_Operator ) ) ;
-
- top->roperator = roperator ;
- top->roperator->which = Z_Operator_or ;
- top->roperator->u.op_or = odr_nullval() ;
-
- top->s1 = s1 ;
- top->s1->which = Z_RPNStructure_simple ;
- top->s1->u.simple = sim1 ;
-
- top->s2 = s2 ;
- top->s2->which = Z_RPNStructure_simple ;
- top->s2->u.simple = sim2 ;
-
- return top ;
-}
-
-/*
-** set_1complex_1operand()
-** build a complex "node" with a complex "node" branch and an
-** operand "node" branch
-*/
-Z_Complex *set_1complex_1operand( Z_Complex *comp,Z_Operand *simp )
-{
- Z_Complex *top ;
- Z_RPNStructure *s1 ;
- Z_RPNStructure *s2 ;
- Z_Operator *roperator ;
-
- top = (Z_Complex *) malloc( sizeof( Z_Complex ) ) ;
- s1 = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ;
- s2 = (Z_RPNStructure *)malloc( sizeof( Z_RPNStructure ) ) ;
- roperator = (Z_Operator *) malloc( sizeof( Z_Operator ) ) ;
-
- top->roperator = roperator ;
- top->roperator->which = Z_Operator_or ;
- top->roperator->u.op_or = odr_nullval() ;
-
- top->s1 = s1 ;
- top->s1->which = Z_RPNStructure_complex ;
- top->s1->u.complex = comp ;
-
- top->s2 = s2 ;
- top->s2->which = Z_RPNStructure_simple ;
- top->s2->u.simple = simp ;
-
- return top ;
-}
-
-/*
-** expand_query()
-** expand a simple query into a number of complex queries
-*/
-Z_Complex *expand_query(ZebraHandle zh, Z_Operand *thisop )
-{
- Z_Complex *top ;
- int numattrs = 0 ;
-
- /*
- ** start_rsnode will be set if we have already read the rankfile
- ** so don't bother again but we need to know the number of attributes
- ** in the linked list so traverse it again to find out how many.
- */
- if ( start_rsnode )
- {
- refrsnode node = start_rsnode ;
- while ( node )
- {
- numattrs++ ;
- node = node->next_rsnode ;
- }
- }
-
- /*
- ** only expand the query if there are 2 or more attributes
- */
- if ( numattrs >= 2 )
- {
- refrsnode node = start_rsnode ;
- int attr1 ;
- int attr2 ;
-
- attr1 = node->rank ; node = node->next_rsnode ;
- attr2 = node->rank ; node = node->next_rsnode ;
-
- /*
- ** this is the special case and has to be done first because the
- ** last complex node in the linear list has two simple nodes whereas
- ** all the others have a complex and a simple.
- */
- top = set_2operands( set_operand( thisop,attr1 ),
- set_operand( thisop,attr2 ) ) ;
-
- /*
- ** do the rest as complex/simple pairs
- */
- while ( node )
- {
- attr1 = node->rank ; node = node->next_rsnode ;
- top = set_1complex_1operand( top,set_operand( thisop,attr1 ) ) ;
- }
- /*
- ** finally add the 1016 rank attribute at the top of the tree
- */
- top = set_1complex_1operand( top,set_operand( thisop,1016 ) ) ;
-
- return top ;
- }
- else return NULL ;
-}
-
-/*
-** check_operand_attrs()
-** loop through the attributes of a particular operand
-** return 1 if (type==1 && value==1016) && (type==2 && value==102)
-** otherwise return 0
-*/
-int check_operand_attrs( Z_Operand *thisop )
-{
- Z_AttributeElement *attrptr ;
- int cond1 = 0 ;
- int cond2 = 0 ;
- int numattrs ;
- int i ;
-
- numattrs = thisop->u.attributesPlusTerm->attributes->num_attributes ;
-
- for ( i = 0 ; i < numattrs ; i++ )
- {
- attrptr = thisop->u.attributesPlusTerm->attributes->attributes[i] ;
-
- if ( (*attrptr->attributeType == 1) &&
- (*attrptr->value.numeric == 1016) )
- cond1 = 1 ;
-
- if ( (*attrptr->attributeType == 2) &&
- (*attrptr->value.numeric == 102) )
- cond2 = 1 ;
- }
-
- return (cond1 & cond2) ;
-}
-
-/*
-** convert_simple2complex()
-**
-*/
-void convert_simple2complex(ZebraHandle zh, Z_RPNStructure *rpnstruct )
-{
- Z_Complex *complex = NULL ;
- Z_Operand *operand = rpnstruct->u.simple ;
-
- if ( check_operand_attrs( operand ) )
- {
- complex = expand_query(zh, operand ) ;
-
- if ( complex )
- {
- /*
- ** Everything is complete so replace the original
- ** operand with the newly built complex structure
- ** This is it ... no going back!!
- */
- rpnstruct->which = Z_RPNStructure_complex ;
- rpnstruct->u.complex = complex ;
- }
- }
-}
-
-/*
-** walk_complex_query()
-** recursively traverse the tree expanding any simple queries we find
-*/
-void walk_complex_query(ZebraHandle zh, Z_RPNStructure *rpnstruct )
-{
- if ( rpnstruct->which == Z_RPNStructure_simple )
- {
- convert_simple2complex(zh, rpnstruct ) ;
- }
- else
- {
- walk_complex_query(zh, rpnstruct->u.complex->s1 ) ;
- walk_complex_query(zh, rpnstruct->u.complex->s2 ) ;
- }
-}
-
-void zebra_livcode_transform(ZebraHandle zh, Z_RPNQuery *query)
-{
- /*
- ** Got a search request,
- ** 1. if it is a simple query, see if it suitable for expansion
- ** i.e. the attributes are of the form ...
- ** (type==1 && value==1016) && (type==2 && value==102)
- ** or
- ** 2. if it is complex, traverse the complex query tree and expand
- ** any simples simples as above
- */
-#if LIV_CODE
- Z_RPNStructure *rpnstruct = query->RPNStructure ;
-
- if ( rpnstruct->which == Z_RPNStructure_simple )
- {
- convert_simple2complex(zh, rpnstruct ) ;
- }
- else if ( rpnstruct->which == Z_RPNStructure_complex )
- {
- walk_complex_query(zh, rpnstruct ) ;
- }
-#endif
-}
-
-
-struct rank_class_info {
- int dummy;
-};
-
-struct rank_term_info {
- int local_occur;
- int global_occur;
- int global_inv;
- int rank_flag;
-};
-
-struct rank_set_info {
- int last_pos;
- int no_entries;
- int no_rank_entries;
- NMEM nmem;
- struct rank_term_info *entries;
-};
-
-static int log2_int (unsigned g)
-{
- int n = 0;
- while ((g = g>>1))
- n++;
- return n;
-}
-
-/*
- * create: Creates/Initialises this rank handler. This routine is
- * called exactly once. The routine returns the class_handle.
- */
-static void *create (ZebraHandle zh)
-{
- struct rank_class_info *ci = (struct rank_class_info *)
- xmalloc (sizeof(*ci));
-
- logf (LOG_DEBUG, "livrank create");
-
- read_zrank_file(zh) ;
-
- return ci;
-}
-
-/*
- * destroy: Destroys this rank handler. This routine is called
- * when the handler is no longer needed - i.e. when the server
- * dies. The class_handle was previously returned by create.
- */
-static void destroy (struct zebra_register *reg, void *class_handle)
-{
- struct rank_class_info *ci = (struct rank_class_info *) class_handle;
-
- logf (LOG_DEBUG, "livrank destroy");
- xfree (ci);
-}
-
-
-/*
- * begin: Prepares beginning of "real" ranking. Called once for
- * each result set. The returned handle is a "set handle" and
- * will be used in each of the handlers below.
- */
-static void *begin (struct zebra_register *reg, void *class_handle,
- RSET rset, NMEM nmem)
-{
- struct rank_set_info *si = (struct rank_set_info *) xmalloc (sizeof(*si));
- int i;
-
- logf (LOG_DEBUG, "livrank begin");
- /* FIXME - Now that we don't have term counts in rsets, what do we */
- /* do about this ??? */
- si->no_entries = 0; /* rset->no_rset_terms; */ /* FIXME ??? */
- si->no_rank_entries = 0;
- si->nmem=nmem;
- si->entries = (struct rank_term_info *)
- xmalloc (sizeof(*si->entries)*si->no_entries);
- for (i = 0; i < si->no_entries; i++)
- {
- const char *flags = ""; /* rset->rset_terms[i]->flags; *//* FIXME ???*/
- int g = 0; /* rset->rset_terms[i]->nn; */ /* FIXME ??? */
- const char *cp = strstr(flags, ",u=");
-
- si->entries[i].rank_flag = 1;
- if (cp)
- {
- char *t = search_for_rankstr(atoi(cp+3));
- if (t)
- si->entries[i].rank_flag = search_for_score(t) ;
- }
- if ( si->entries[i].rank_flag )
- (si->no_rank_entries)++;
-
- si->entries[i].local_occur = 0;
- si->entries[i].global_occur = g;
- si->entries[i].global_inv = 32 - log2_int (g);
- logf (LOG_DEBUG, "-------- %d ------", 32 - log2_int (g));
- }
- return si;
-}
-
-/*
- * end: Terminates ranking process. Called after a result set
- * has been ranked.
- */
-static void end (struct zebra_register *reg, void *set_handle)
-{
- struct rank_set_info *si = (struct rank_set_info *) set_handle;
- logf (LOG_DEBUG, "livrank end");
- xfree (si->entries);
- xfree (si);
-}
-
-/*
- * add: Called for each word occurence in a result set. This routine
- * should be as fast as possible. This routine should "incrementally"
- * update the score.
- */
-static void add (void *set_handle, int seqno, int term_index)
-{
- struct rank_set_info *si = (struct rank_set_info *) set_handle;
- logf (LOG_DEBUG, "rank-1 add seqno=%d term_index=%d", seqno, term_index);
- si->last_pos = seqno;
- si->entries[term_index].local_occur++;
-}
-
-/*
- * calc: Called for each document in a result. This handler should
- * produce a score based on previous call(s) to the add handler. The
- * score should be between 0 and 1000. If score cannot be obtained
- * -1 should be returned.
- */
-static int calc (void *set_handle, zint sysno)
-{
- int i, lo, divisor, score = 0;
- struct rank_set_info *si = (struct rank_set_info *) set_handle;
-
- logf (LOG_DEBUG, "livrank calc sysno=" ZINT_FORMAT, sysno);
-
- if (!si->no_rank_entries)
- return -1;
- for (i = 0; i < si->no_entries; i++)
- {
- score += si->entries[i].local_occur * si->entries[i].rank_flag ;
- }
- for (i = 0; i < si->no_entries; i++)
- if (si->entries[i].rank_flag && (lo = si->entries[i].local_occur))
- score += (8+log2_int (lo)) * si->entries[i].global_inv;
- score *= 34;
- divisor = si->no_rank_entries * (8+log2_int (si->last_pos/si->no_entries));
- score = score / divisor;
- if (score > 1000)
- score = 1000;
- for (i = 0; i < si->no_entries; i++)
- si->entries[i].local_occur = 0;
- return score;
-}
-
-/*
- * Pseudo-meta code with sequence of calls as they occur in a
- * server. Handlers are prefixed by --:
- *
- * server init
- * -- create
- * foreach search
- * rank result set
- * -- begin
- * foreach record
- * foreach word
- * -- add
- * -- calc
- * -- end
- * -- destroy
- * server close
- */
-
-static struct rank_control rank_control = {
- "livrank",
- create,
- destroy,
- begin,
- end,
- calc,
- add,
-};
-
-struct rank_control *rankliv_class = &rank_control;
-#endif
-
-/* $Id: rank1.c,v 1.18 2004-10-26 15:32:11 heikki Exp $
+/* $Id: rank1.c,v 1.19 2004-10-28 10:37:15 heikki Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
Index Data Aps
#include <unistd.h>
#endif
-#define DEBUG_RANK 1
+#define DEBUG_RANK 0
#include "index.h"
-/* $Id: zebraapi.c,v 1.137 2004-10-26 15:32:11 heikki Exp $
+/* $Id: zebraapi.c,v 1.138 2004-10-28 10:37:15 heikki Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
reg->ptr_i=0;
zebraRankInstall (reg, rank1_class);
-#ifdef SKIPTHIS /* FIXME - those ranks not yet converted to new interface */
zebraRankInstall (reg, rankzv_class);
- zebraRankInstall (reg, rankliv_class);
-#endif
recordCompression = res_get_def (res, "recordCompression", "none");
if (!strcmp (recordCompression, "none"))
if (zebra_begin_read (zh))
return 1;
-#ifdef SKIPTHIS /* FIXME - livcode rank not yet available */
- zebra_livcode_transform(zh, query);
-#endif
-
resultSetAddRPN (zh, odr_extract_mem(o), query,
zh->num_basenames, zh->basenames, setname);
&recs[i].len,
&recs[i].base);
recs[i].errString = NULL;
+ recs[i].score=poset[i].score;
+ recs[i].sysno=poset[i].sysno;
}
else
{
-/* $Id: zvrank.c,v 1.11 2004-10-26 15:32:11 heikki Exp $
+/* $Id: zvrank.c,v 1.12 2004-10-28 10:37:15 heikki Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
Index Data Aps
** "ntc-atn", "atc-atn", etc.
*/
-#if SKIPTHIS /* FIXME - Disabled while changing the interface to ranking */
#include <math.h> /* for log */
rs->db_terms=500000; /* assign correct value here (for debugging) */
rs->db_f_max=50; /* assign correct value here */
rs->db_f_max_str="a"; /* assign correct value here (for debugging) */
+ /* FIXME - get those values from somewhere */
zv_init_scheme(rs, rscheme);
return;
}
* will be used in each of the handlers below.
*/
static void *zv_begin(struct zebra_register *reg, void *class_handle,
- RSET rset, NMEM nmem)
+ RSET rset, NMEM nmem, TERMID *terms, int numterms)
{
- struct rs_info *rs=(struct rs_info *)xmalloc(sizeof(*rs));
+ struct rs_info *rs=(struct rs_info *)nmem_malloc(nmem,sizeof(*rs));
struct rank_class_info *ci=(struct rank_class_info *)class_handle;
int i;
int veclen;
+ int *ip;
zint gocc;
/**/
yaz_log(LOG_DEBUG, "zv_begin");
- veclen= 0 ; /* rset->no_rset_terms;*/ /* smaller vector here */
- /* FIXME - Now that we don't have term lists in rsets, what do */
- /* we do here ??? */
+ veclen= numterms;
zv_init(rs, ci->rscheme);
rs->nmem=nmem;
rs->veclen=veclen;
prn_rs(rs);
- rs->qdoc=(struct ds_info *)xmalloc(sizeof(*rs->qdoc));
- rs->qdoc->terms=(struct ts_info *)xmalloc(sizeof(*rs->qdoc->terms)*rs->veclen);
+ rs->qdoc=(struct ds_info *)nmem_malloc(nmem,sizeof(*rs->qdoc));
+ rs->qdoc->terms=(struct ts_info *)nmem_malloc(nmem,
+ sizeof(*rs->qdoc->terms)*rs->veclen);
rs->qdoc->veclen=veclen;
rs->qdoc->d_f_max=1; /* no duplicates */
rs->qdoc->d_f_max_str="";
- rs->rdoc=(struct ds_info *)xmalloc(sizeof(*rs->rdoc));
- rs->rdoc->terms=(struct ts_info *)xmalloc(sizeof(*rs->rdoc->terms)*rs->veclen);
+ rs->rdoc=(struct ds_info *)nmem_malloc(nmem,sizeof(*rs->rdoc));
+ rs->rdoc->terms=(struct ts_info *)nmem_malloc(nmem,
+ sizeof(*rs->rdoc->terms)*rs->veclen);
rs->rdoc->veclen=veclen;
rs->rdoc->d_f_max=10; /* just a guess */
rs->rdoc->d_f_max_str="";
/* yaz_log(LOG_DEBUG, "zv_begin_init"); */
for (i = 0; i < rs->veclen; i++)
{
- gocc= 0; /* rset->rset_terms[i]->nn; */ /* FIXME ??? */
+ gocc= rset_count(terms[i]->rset);
+ terms[i]->rankpriv=ip=nmem_malloc(nmem, sizeof(int));
+ *ip=i; /* save the index for add() */
/* yaz_log(LOG_DEBUG, "zv_begin_init i=%d gocc=%d", i, gocc); */
rs->qdoc->terms[i].gocc=gocc;
rs->qdoc->terms[i].locc=1; /* assume query has no duplicate terms */
*/
static void zv_end (struct zebra_register *reg, void *rsi)
{
- RS rs=(RS)rsi;
yaz_log(LOG_DEBUG, "zv_end");
- xfree(rs->qdoc->terms);
- xfree(rs->rdoc->terms);
- xfree(rs->qdoc);
- xfree(rs->rdoc);
- xfree(rs);
+ /* they all are nmem'd */
return;
}
* should be as fast as possible. This routine should "incrementally"
* update the score.
*/
-static void zv_add (void *rsi, int seqno, int i) {
+static void zv_add (void *rsi, int seqno, TERMID term) {
RS rs=(RS)rsi;
- /* yaz_log(LOG_DEBUG, "zvrank zv_add seqno=%d term_index=%d", seqno, term_index);*/
+ int *ip = term->rankpriv;
+ int i=*ip;
rs->rdoc->terms[i].locc++;
+ yaz_log(LOG_DEBUG, "zvrank zv_add seqno=%d '%s' term_index=%d cnt=%d",
+ seqno, term->name, i, rs->rdoc->terms[i].locc );
}
/*
(*rs->d_norm_fct)(rs, rs->rdoc);
dscore=rs->sim_fct(rs->qdoc, rs->rdoc);
}
- score = (int) dscore * 1000;
- yaz_log (LOG_LOG, "sysno=" ZINT_FORMAT " score=%d", sysno, score);
+ score = (int) (dscore * 1000 +.5);
+ yaz_log (LOG_DEBUG, "zv_calc: sysno=" ZINT_FORMAT " score=%d",
+ sysno, score);
if (score > 1000) /* should not happen */
score = 1000;
return (int) score;
struct rank_control *rankzv_class = &rank_control_vsm;
-#endif /* SKIPTHIS */
/* EOF */
-# $Id: Makefile.am,v 1.19 2004-10-24 13:34:45 adam Exp $
+# $Id: Makefile.am,v 1.20 2004-10-28 10:37:15 heikki Exp $
noinst_PROGRAMS = testclient
testclient_SOURCES = testclient.c
-check_PROGRAMS = t1 t2 t3 t4 t5 t6 t7 t8
+check_PROGRAMS = t1 t2 t3 t4 t5 t6 t7 t8 t9 t10
TESTS = $(check_PROGRAMS)
EXTRA_DIST=zebra.cfg zebra6.cfg zebra8.cfg
t6_SOURCES = t6.c
t7_SOURCES = t7.c
t8_SOURCES = t8.c
+t9_SOURCES = t9.c testlib.c
+t10_SOURCES = t10.c testlib.c
AM_CPPFLAGS = -I$(top_srcdir)/include $(YAZINC)
--- /dev/null
+/* $Id: rankingrecords.h,v 1.1 2004-10-28 10:37:15 heikki Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
+ Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+/** rankingrecords.h - some test data for t9, and t10 */
+
+const char *recs[] = {
+ "<gils>\n"
+ " <title>The first title</title>\n"
+ " <abstract> \n"
+ " The first common word is the: the the the \n"
+ " The second common word is word \n"
+ " but all have the foo bar \n"
+ " </abstract>\n"
+ "</gils>\n",
+
+ "<gils>\n"
+ " <title>The second title</title>\n"
+ " <abstract> \n"
+ " The first common word is the: the \n"
+ " The second common word is foo: foo foo \n"
+ " but all have the foo bar \n"
+ " </abstract>\n"
+ "</gils>\n",
+
+ "<gils>\n"
+ " <title>The third title</title>\n"
+ " <abstract> \n"
+ " The first common word is the: the \n"
+ " The third common word is bar: bar \n"
+ " but all have the foo bar \n"
+ " </abstract>\n"
+ "</gils>\n",
+
+ 0 };
+
+
--- /dev/null
+/* $Id: t10.c,v 1.1 2004-10-28 10:37:15 heikki Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
+ Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+/** t10.c - test zv-rank */
+
+#include <yaz/log.h>
+#include <yaz/pquery.h>
+#include <idzebra/api.h>
+#include "testlib.h"
+#include "rankingrecords.h"
+
+#define qry(zh,query,hits,string,score) \
+ RankingQuery(__LINE__,(zh),(query),(hits),(string),(score))
+
+struct tst {
+ char *schema;
+ char *hit1;
+ int score1;
+ char *hit2;
+ int score2;
+ char *hit3;
+ int score3;
+};
+
+
+
+struct tst tests[] = {
+ {"ntc-atn", "first title", 1000, "first title", 1000, "third title", 826 },
+ {"ntc-ntn", "first title", 1000, "first title", 1000, "third title", 826 },
+ {"ntc-btn", "first title", 1000, "first title", 1000, "third title", 826 },
+ {"ntc-apn", "first title", 1000, "first title", 1000, "third title", 826 },
+ {"ntc-npn", "first title", 1000, "first title", 1000, "third title", 826 },
+ {"ntc-bpn", "first title", 1000, "first title", 1000, "third title", 826 },
+
+ {"atc-atn", "first title", 1000, "first title", 1000, "first title", 972 },
+ {"atc-ntn", "first title", 1000, "first title", 1000, "first title", 972 },
+ {"atc-btn", "first title", 1000, "first title", 1000, "first title", 972 },
+ {"atc-apn", "first title", 1000, "first title", 1000, "first title", 972 },
+ {"atc-npn", "first title", 1000, "first title", 1000, "first title", 972 },
+ {"atc-bpn", "first title", 1000, "first title", 1000, "first title", 972 },
+
+ {"npc-atn", "first title", 1000, "first title", 1000, "third title", 826 },
+ {"npc-ntn", "first title", 1000, "first title", 1000, "third title", 826 },
+ {"npc-btn", "first title", 1000, "first title", 1000, "third title", 826 },
+ {"npc-apn", "first title", 1000, "first title", 1000, "third title", 826 },
+ {"npc-npn", "first title", 1000, "first title", 1000, "third title", 826 },
+ {"npc-bpn", "first title", 1000, "first title", 1000, "third title", 826 },
+
+ {"apc-atn", "first title", 1000, "first title", 1000, "first title", 972 },
+ {"apc-ntn", "first title", 1000, "first title", 1000, "first title", 972 },
+ {"apc-btn", "first title", 1000, "first title", 1000, "first title", 972 },
+ {"apc-apn", "first title", 1000, "first title", 1000, "first title", 972 },
+ {"apc-npn", "first title", 1000, "first title", 1000, "first title", 972 },
+ {"apc-bpn", "first title", 1000, "first title", 1000, "first title", 972 },
+
+ {0,0,0,0,0,0,0},
+};
+
+int main(int argc, char **argv)
+{
+ int i;
+ char *addinfo;
+ ZebraService zs;
+ ZebraHandle zh;
+
+ yaz_log_init_file("t10.log");
+ /* yaz_log_init_level(LOG_ALL); */
+
+ nmem_init ();
+
+ zs = start_service("zebrazv.cfg");
+ zh = zebra_open (zs);
+ zebra_select_database(zh, "Default");
+ logf(LOG_LOG,"going to call init");
+ i=zebra_init(zh);
+ logf(LOG_LOG,"init returned %d",i);
+ if (i) {
+ printf("init failed with %d\n",i);
+ zebra_result(zh, &i, &addinfo);
+ printf(" Error %d %s\n",i,addinfo);
+ exit(1);
+ }
+
+ zebra_begin_trans (zh, 1);
+ for (i = 0; recs[i]; i++)
+ zebra_add_record (zh, recs[i], strlen(recs[i]));
+ zebra_end_trans (zh);
+ zebra_commit (zh);
+
+ // yaz_log_init_level(LOG_ALL);
+
+ zebra_close(zh);
+
+
+ for (i=0; tests[i].schema; i++)
+ {
+ zh = zebra_open (zs);
+ zebra_select_database(zh, "Default");
+ zebra_set_resource(zh, "zvrank.weighting-scheme", tests[i].schema);
+ logf(LOG_LOG,"============%d: %s ============", i,tests[i].schema);
+
+ RankingQuery( __LINE__, zh, "@attr 1=1016 @attr 2=102 the",
+ 3, tests[i].hit1, tests[i].score1);
+ RankingQuery( __LINE__, zh, "@attr 1=1016 @attr 2=102 @or foo bar",
+ 3, tests[i].hit2, tests[i].score2);
+ RankingQuery( __LINE__, zh,
+ "@attr 1=1016 @attr 2=102 @or @or the foo bar",
+ 3, tests[i].hit3, tests[i].score3);
+
+ zebra_close(zh);
+ }
+
+ zebra_stop (zs);
+
+ nmem_exit ();
+ xmalloc_trav ("x");
+ logf(LOG_LOG,"============ ALL TESTS PASSED OK ============");
+ exit (0);
+}
--- /dev/null
+/* $Id: t9.c,v 1.1 2004-10-28 10:37:15 heikki Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
+ Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+/** t9.c - test rank-1 */
+
+#include <yaz/log.h>
+#include <yaz/pquery.h>
+#include <idzebra/api.h>
+#include "testlib.h"
+#include "rankingrecords.h"
+
+#define qry(zh,query,hits,string,score) \
+ RankingQuery(__LINE__,(zh),(query),(hits),(string),(score))
+
+int main(int argc, char **argv)
+{
+ int i;
+ char *addinfo;
+ ZebraService zs;
+ ZebraHandle zh;
+
+ yaz_log_init_file("t9.log");
+ /* yaz_log_init_level(LOG_ALL); */
+
+ nmem_init ();
+
+ zs = start_service(""); /* default to zebra.cfg */
+ zh = zebra_open (zs);
+ zebra_select_database(zh, "Default");
+ logf(LOG_LOG,"going to call init");
+ i=zebra_init(zh);
+ logf(LOG_LOG,"init returned %d",i);
+ if (i) {
+ printf("init failed with %d\n",i);
+ zebra_result(zh, &i, &addinfo);
+ printf(" Error %d %s\n",i,addinfo);
+ exit(1);
+ }
+
+ zebra_begin_trans (zh, 1);
+ for (i = 0; recs[i]; i++)
+ zebra_add_record (zh, recs[i], strlen(recs[i]));
+ zebra_end_trans (zh);
+
+ zebra_select_database(zh, "Default");
+
+ qry( zh, "@attr 1=1016 @attr 2=102 the",
+ 3, "first title", 872 );
+
+ qry( zh, "@attr 1=1016 @attr 2=102 foo",
+ 3, "second title", 850 );
+
+ /* get the record with the most significant hit, that is the 'bar' */
+ /* as that is the rarest of my search words */
+ qry( zh, "@attr 1=1016 @attr 2=102 @or @or the foo bar",
+ 3, "third title", 895 );
+
+
+ zebra_commit (zh);
+ zebra_close (zh);
+ zebra_stop (zs);
+
+ nmem_exit ();
+ xmalloc_trav ("x");
+ exit (0);
+}
--- /dev/null
+/* $Id: testlib.c,v 1.1 2004-10-28 10:37:15 heikki Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
+ Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+/** testlib - utilities for the api tests */
+
+#include <yaz/log.h>
+#include <yaz/pquery.h>
+#include <idzebra/api.h>
+
+
+/* read zebra.cfg from env var srcdir if it exists; otherwise current dir */
+ZebraService start_service(char *cfgname)
+{
+ char cfg[256];
+ char *srcdir = getenv("srcdir");
+ if (!srcdir || ! *srcdir)
+ srcdir=".";
+ if (!cfgname || ! *cfgname )
+ cfgname="zebra.cfg";
+ /*sprintf(cfg, "%.200s%szebra.cfg", srcdir ? srcdir : "", srcdir ? "/" : ""); */
+
+ sprintf(cfg, "%.200s/%s",srcdir, cfgname);
+ return zebra_start(cfg);
+}
+
+/**
+ * makes a query, checks number of hits, and for the first hit, that
+ * it contains the given string, and that it gets the right score
+ */
+void RankingQuery(int lineno, ZebraHandle zh, char *query,
+ int exphits, char *firstrec, int firstscore )
+{
+ ZebraRetrievalRecord retrievalRecord[10];
+ ODR odr_output = odr_createmem (ODR_DECODE);
+ ODR odr_input = odr_createmem (ODR_DECODE);
+ YAZ_PQF_Parser parser = yaz_pqf_create();
+ Z_RPNQuery *rpn = yaz_pqf_parse(parser, odr_input, query);
+ const char *setname="rsetname";
+ int hits;
+ int rc;
+ int i;
+
+ logf(LOG_LOG,"======================================");
+ logf(LOG_LOG,"qry[%d]: %s", lineno, query);
+
+ if (!rpn) {
+ printf("Error: Parse failed \n%s\n",query);
+ exit(1);
+ }
+ rc=zebra_search_RPN (zh, odr_input, rpn, setname, &hits);
+ if (rc) {
+ printf("Error: search returned %d \n%s\n",rc,query);
+ exit (1);
+ }
+
+ if (hits != exphits) {
+ printf("Error: search returned %d hits instead of %d\n",
+ hits, exphits);
+ exit (1);
+ }
+ yaz_pqf_destroy(parser);
+
+ for (i = 0; i<10; i++)
+ {
+ retrievalRecord[i].position = i+1;
+ retrievalRecord[i].score = i+20000;
+ }
+
+ rc=zebra_records_retrieve (zh, odr_output, setname, 0,
+ VAL_TEXT_XML, hits, retrievalRecord);
+
+ if (rc) {
+ printf("Error: retrieve returned %d \n%s\n",rc,query);
+ exit (1);
+ }
+
+ if (!strstr(retrievalRecord[0].buf, firstrec))
+ {
+ printf("Error: Got the wrong record first\n");
+ printf("Expected '%s' but got \n",firstrec);
+ printf("%.*s\n",retrievalRecord[0].len,retrievalRecord[0].buf);
+ exit(1);
+ }
+
+ if (retrievalRecord[0].score != firstscore)
+ {
+ printf("Error: first rec got score %d instead of %d\n",
+ retrievalRecord[0].score, firstscore);
+ exit(1);
+ }
+ odr_destroy (odr_output);
+ odr_destroy (odr_input);
+}
+