1 /* $Id: rsbetween.c,v 1.30 2004-11-05 17:44:32 heikki Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 /* rsbetween is (mostly) used for xml searches. It returns the hits of the
25 * "middle" rset, that are in between the "left" and "right" rsets. For
26 * example "Shakespeare" in between "<author>" and </author>. The thing is
27 * complicated by the inclusion of attributes (from their own rset). If attrs
28 * specified, they must match the "left" rset (start tag). "Hamlet" between
29 * "<title lang=eng>" and "</title>". (This assumes that the attributes are
30 * indexed to the same seqno as the tags).
43 static RSFD r_open (RSET ct, int flag);
44 static void r_close (RSFD rfd);
45 static void r_delete (RSET ct);
46 static int r_forward(RSFD rfd, void *buf,
47 TERMID *term, const void *untilbuf);
48 static int r_read(RSFD rfd, void *buf, TERMID *term );
49 static int r_write(RSFD rfd, const void *buf);
50 static void r_pos(RSFD rfd, double *current, double *total);
51 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
53 static const struct rset_control control =
67 const struct rset_control *rset_kind_between = &control;
74 struct rset_between_info {
75 RSET andset; /* the multi-and of the above */
76 TERMID startterm; /* pseudo terms for detecting which one we read from */
81 struct rset_between_rfd {
83 void *recbuf; /* a key that tells which record we are in */
84 void *startbuf; /* the start tag */
85 int startbufok; /* we have seen the first start tag */
86 void *attrbuf; /* the attr tag. If these two match, we have attr match */
87 int attrbufok; /* we have seen the first attr tag, can compare */
88 int depth; /* number of start-tags without end-tags */
89 int attrdepth; /* on what depth the attr matched */
93 static int log_level=0;
94 static int log_level_initialized=0;
97 RSET rsbetween_create( NMEM nmem, const struct key_control *kcontrol,
99 RSET rset_l, RSET rset_m, RSET rset_r, RSET rset_attr)
101 RSET rnew=rset_create_base(&control, nmem, kcontrol, scope,0);
102 struct rset_between_info *info=
103 (struct rset_between_info *) nmem_malloc(rnew->nmem,sizeof(*info));
107 if (!log_level_initialized)
109 log_level=yaz_log_module_level("rsbetween");
110 log_level_initialized=1;
112 rsetarray[STARTTAG] = rset_l;
113 rsetarray[HIT] = rset_m;
114 rsetarray[STOPTAG] = rset_r;
115 rsetarray[ATTRTAG] = rset_attr;
117 /* make sure we have decent terms for all rsets. Create dummies if needed*/
118 if (!rsetarray[STARTTAG]->term)
120 rsetarray[STARTTAG]->term=
121 rset_term_create("<starttag>",strlen("<starttag>"),"",0,nmem);
122 rsetarray[STARTTAG]->term->rset=rsetarray[STARTTAG];
124 info->startterm=rsetarray[STARTTAG]->term;
126 if (!rsetarray[STOPTAG]->term)
128 rsetarray[STOPTAG]->term=
129 rset_term_create("<stoptag>",strlen("<stoptag>"),"",0,nmem);
130 rsetarray[STOPTAG]->term->rset=rsetarray[STOPTAG];
132 info->stopterm=rsetarray[STOPTAG]->term;
136 if (!rsetarray[ATTRTAG]->term)
138 rsetarray[ATTRTAG]->term=
139 rset_term_create("<attrtag>",strlen("<attrtag>"),"",0,nmem);
140 rsetarray[ATTRTAG]->term->rset=rsetarray[ATTRTAG];
142 info->attrterm=rsetarray[ATTRTAG]->term;
147 n--; /* smaller and */
149 info->andset=rsmultiand_create( nmem, kcontrol, scope, n, rsetarray);
151 logf(log_level,"create rset at %p",rnew);
156 static void r_delete (RSET ct)
158 struct rset_between_info *info = (struct rset_between_info *) ct->priv;
159 logf(log_level,"delete rset at %p",ct);
160 rset_delete(info->andset);
164 static RSFD r_open (RSET ct, int flag)
166 struct rset_between_info *info = (struct rset_between_info *) ct->priv;
168 struct rset_between_rfd *p;
170 if (flag & RSETF_WRITE)
172 logf (LOG_FATAL, "between set type is read-only");
175 rfd=rfd_create_base(ct);
177 p=(struct rset_between_rfd *)rfd->priv;
179 p = (struct rset_between_rfd *) nmem_malloc(ct->nmem, (sizeof(*p)));
181 p->recbuf = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
182 p->startbuf = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
183 p->attrbuf = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
185 p->andrfd = rset_open (info->andset, RSETF_READ);
191 logf(log_level,"open rset=%p rfd=%p", ct, rfd);
195 static void r_close (RSFD rfd)
197 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
198 logf(log_level,"close rfd=%p", rfd);
199 rset_close (p->andrfd);
200 rfd_delete_base(rfd);
205 static int r_forward(RSFD rfd, void *buf,
206 TERMID *term, const void *untilbuf)
208 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
210 logf(log_level, "forwarding ");
211 rc=rset_forward(p->andrfd,buf,term,untilbuf);
217 static void checkattr(RSFD rfd)
219 struct rset_between_info *info =(struct rset_between_info *)rfd->rset->priv;
220 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
221 const struct key_control *kctrl=rfd->rset->keycontrol;
224 return; /* already found one */
227 p->attrdepth=-1; /* matches always */
230 if ( p->startbufok && p->attrbufok )
231 { /* have buffers to compare */
232 cmp=(kctrl->cmp)(p->startbuf,p->attrbuf);
233 if (0==cmp) /* and the keys match */
235 p->attrdepth=p->depth;
236 logf(log_level, "found attribute match at depth %d",p->attrdepth);
242 static int r_read (RSFD rfd, void *buf, TERMID *term)
244 struct rset_between_info *info =(struct rset_between_info *)rfd->rset->priv;
245 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
246 const struct key_control *kctrl=rfd->rset->keycontrol;
248 logf(log_level,"== read: term=%p",term);
252 while ( rset_read(p->andrfd,buf,term) )
254 logf(log_level,"read loop term=%p d=%d ad=%d",
255 *term,p->depth, p->attrdepth);
258 memcpy(p->recbuf,buf,kctrl->key_size);
260 cmp=rfd->rset->scope; /* force newrecord */
263 cmp=(kctrl->cmp)(buf,p->recbuf);
264 logf(log_level, "cmp=%d",cmp);
267 if (cmp>=rfd->rset->scope)
269 logf(log_level,"new record");
272 memcpy(p->recbuf,buf,kctrl->key_size);
276 logf(log_level," term: '%s'", (*term)->name);
277 if (*term==info->startterm)
280 logf(log_level,"read start tag. d=%d",p->depth);
281 memcpy(p->startbuf,buf,kctrl->key_size);
283 checkattr(rfd); /* in case we already saw the attr here */
285 else if (*term==info->stopterm)
287 if (p->depth == p->attrdepth)
288 p->attrdepth=0; /* ending the tag with attr match */
290 logf(log_level,"read end tag. d=%d ad=%d",p->depth, p->attrdepth);
292 else if (*term==info->attrterm)
294 logf(log_level,"read attr");
295 memcpy(p->attrbuf,buf,kctrl->key_size);
297 checkattr(rfd); /* in case the start tag came first */
300 { /* mut be a real hit */
301 if (p->depth && p->attrdepth)
304 logf(log_level,"got a hit h="ZINT_FORMAT" d=%d ad=%d",
305 p->hits,p->depth,p->attrdepth);
306 return 1; /* we have everything in place already! */
308 logf(log_level, "Ignoring hit. h="ZINT_FORMAT" d=%d ad=%d",
309 p->hits,p->depth,p->attrdepth);
318 static int r_write (RSFD rfd, const void *buf)
320 logf (LOG_FATAL, "between set type is read-only");
325 static void r_pos (RSFD rfd, double *current, double *total)
327 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
328 rset_pos(p->andrfd,current, total);
329 logf(log_level,"pos: %0.1f/%0.1f ", *current, *total);
332 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
334 struct rset_between_info *info = (struct rset_between_info *) ct->priv;
335 rset_getterms(info->andset, terms, maxterms, curterm);