2 * Copyright (C) 1994-2002, Index Data
4 * Sebastian Hammer, Adam Dickmeiss
6 * $Id: zsets.c,v 1.33 2002-03-20 20:24:30 adam Exp $
19 #define SORT_IDX_ENTRYSIZE 64
20 #define ZSET_SORT_MAX_LEVEL 3
22 struct zebra_set_term_entry {
37 struct zset_sort_info *sort_info;
38 struct zebra_set_term_entry *term_entries;
40 struct zebra_set *next;
44 struct zset_sort_entry {
47 char buf[ZSET_SORT_MAX_LEVEL][SORT_IDX_ENTRYSIZE];
50 struct zset_sort_info {
53 struct zset_sort_entry *all_entries;
54 struct zset_sort_entry **entries;
57 ZebraSet resultSetAddRPN (ZebraHandle zh, ODR input, ODR output,
58 Z_RPNQuery *rpn, int num_bases, char **basenames,
67 zebraSet = resultSetAdd (zh, setname, 1);
72 zebraSet->num_bases = num_bases;
73 zebraSet->basenames = basenames;
74 zebraSet->nmem = odr_extract_mem (input);
76 zebraSet->rset = rpn_search (zh, output->mem, rpn,
78 zebraSet->basenames, zebraSet->name,
80 zh->hits = zebraSet->hits;
87 void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type,
88 const char *db, int set,
89 int use, const char *term)
92 s->nmem = nmem_create ();
96 s->term_entries_max = 1000;
98 nmem_malloc (s->nmem, s->term_entries_max *
99 sizeof(*s->term_entries));
100 for (i = 0; i < s->term_entries_max; i++)
101 s->term_entries[i].term = 0;
103 if (s->hits < s->term_entries_max)
105 s->term_entries[s->hits].reg_type = reg_type;
106 s->term_entries[s->hits].db = nmem_strdup (s->nmem, db);
107 s->term_entries[s->hits].set = set;
108 s->term_entries[s->hits].use = use;
109 s->term_entries[s->hits].term = nmem_strdup (s->nmem, term);
115 const char *zebra_resultSetTerms (ZebraHandle zh, const char *setname,
116 int no, int *count, int *no_max)
118 ZebraSet s = resultSetGet (zh, setname);
124 *no_max = s->rset->no_rset_terms;
125 if (no < 0 || no >= *no_max)
127 *count = s->rset->rset_terms[no]->count;
128 return s->rset->rset_terms[no]->name;
132 ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov)
137 for (s = zh->sets; s; s = s->next)
138 if (!strcmp (s->name, name))
142 logf (LOG_DEBUG, "updating result set %s", name);
143 if (!ov || s->locked)
146 rset_delete (s->rset);
148 nmem_destroy (s->nmem);
152 logf (LOG_DEBUG, "adding result set %s", name);
153 s = (ZebraSet) xmalloc (sizeof(*s));
156 s->name = (char *) xmalloc (strlen(name)+1);
157 strcpy (s->name, name);
159 s->sort_info = (struct zset_sort_info *)
160 xmalloc (sizeof(*s->sort_info));
161 s->sort_info->max_entries = 1000;
162 s->sort_info->entries = (struct zset_sort_entry **)
163 xmalloc (sizeof(*s->sort_info->entries) *
164 s->sort_info->max_entries);
165 s->sort_info->all_entries = (struct zset_sort_entry *)
166 xmalloc (sizeof(*s->sort_info->all_entries) *
167 s->sort_info->max_entries);
168 for (i = 0; i < s->sort_info->max_entries; i++)
169 s->sort_info->entries[i] = s->sort_info->all_entries + i;
180 ZebraSet resultSetGet (ZebraHandle zh, const char *name)
184 for (s = zh->sets; s; s = s->next)
185 if (!strcmp (s->name, name))
187 if (!s->term_entries && !s->rset && s->rpn)
189 NMEM nmem = nmem_create ();
190 yaz_log (LOG_LOG, "research %s", name);
192 rpn_search (zh, nmem, s->rpn, s->num_bases,
193 s->basenames, s->name, s);
201 void resultSetInvalidate (ZebraHandle zh)
203 ZebraSet s = zh->sets;
205 for (; s; s = s->next)
208 rset_delete (s->rset);
213 void resultSetDestroy (ZebraHandle zh, int num, char **names,int *statuses)
215 ZebraSet * ss = &zh->sets;
219 for (i = 0; i<num; i++)
220 statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
222 zh->errString = NULL;
229 for (i = 0; i<num; i++)
230 if (!strcmp (s->name, names[i]))
233 statuses[i] = Z_DeleteStatus_success;
242 xfree (s->sort_info->all_entries);
243 xfree (s->sort_info->entries);
244 xfree (s->sort_info);
247 nmem_destroy (s->nmem);
249 rset_delete (s->rset);
258 ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name,
259 int num, int *positions)
265 struct zset_sort_info *sort_info;
267 if (!(sset = resultSetGet (zh, name)))
269 if (!(rset = sset->rset))
271 if (!sset->term_entries)
273 sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num);
274 for (i = 0; i<num; i++)
276 struct zebra_set_term_entry *entry = sset->term_entries;
283 if (positions[i] <= sset->term_entries_max)
285 sr[i].term = sset->term_entries[positions[i]-1].term;
286 sr[i].db = sset->term_entries[positions[i]-1].db;
292 sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num);
293 for (i = 0; i<num; i++)
300 sort_info = sset->sort_info;
305 for (i = 0; i<num; i++)
307 position = positions[i];
308 if (position > 0 && position <= sort_info->num_entries)
310 logf (LOG_DEBUG, "got pos=%d (sorted)", position);
311 sr[i].sysno = sort_info->entries[position-1]->sysno;
312 sr[i].score = sort_info->entries[position-1]->score;
316 /* did we really get all entries using sort ? */
317 for (i = 0; i<num; i++)
322 if (i < num) /* nope, get the rest, unsorted - sorry */
332 position = sort_info->num_entries;
333 while (num_i < num && positions[num_i] < position)
335 rfd = rset_open (rset, RSETF_READ);
336 while (num_i < num && rset_read (rset, rfd, &key, &term_index))
338 if (key.sysno != psysno)
343 /* determine we alreay have this in our set */
344 for (i = sort_info->num_entries; --i >= 0; )
345 if (psysno == sort_info->entries[i]->sysno)
351 assert (num_i < num);
352 if (position == positions[num_i])
354 sr[num_i].sysno = psysno;
355 logf (LOG_DEBUG, "got pos=%d (unsorted)", position);
356 sr[num_i].score = -1;
361 rset_close (rset, rfd);
367 void zebraPosSetDestroy (ZebraHandle zh, ZebraPosSet records, int num)
378 void resultSetInsertSort (ZebraHandle zh, ZebraSet sset,
379 struct sortKeyInfo *criteria, int num_criteria,
382 struct zset_sort_entry this_entry;
383 struct zset_sort_entry *new_entry = NULL;
384 struct zset_sort_info *sort_info = sset->sort_info;
387 sortIdx_sysno (zh->service->sortIdx, sysno);
388 for (i = 0; i<num_criteria; i++)
390 sortIdx_type (zh->service->sortIdx, criteria[i].attrUse);
391 sortIdx_read (zh->service->sortIdx, this_entry.buf[i]);
393 i = sort_info->num_entries;
397 for (j = 0; j<num_criteria; j++)
399 if (criteria[j].numerical)
401 double diff = atof(this_entry.buf[j]) -
402 atof(sort_info->entries[i]->buf[j]);
411 rel = memcmp (this_entry.buf[j], sort_info->entries[i]->buf[j],
419 if (criteria[j].relation == 'A')
424 else if (criteria[j].relation == 'D')
431 j = sort_info->max_entries;
435 if (sort_info->num_entries == j)
438 j = (sort_info->num_entries)++;
439 new_entry = sort_info->entries[j];
442 sort_info->entries[j] = sort_info->entries[j-1];
445 sort_info->entries[i] = new_entry;
447 for (i = 0; i<num_criteria; i++)
448 memcpy (new_entry->buf[i], this_entry.buf[i], SORT_IDX_ENTRYSIZE);
449 new_entry->sysno = sysno;
450 new_entry->score = -1;
453 void resultSetInsertRank (ZebraHandle zh, struct zset_sort_info *sort_info,
454 int sysno, int score, int relation)
456 struct zset_sort_entry *new_entry = NULL;
459 i = sort_info->num_entries;
464 rel = score - sort_info->entries[i]->score;
471 else if (relation == 'A')
478 j = sort_info->max_entries;
482 if (sort_info->num_entries == j)
485 j = (sort_info->num_entries)++;
487 new_entry = sort_info->entries[j];
490 sort_info->entries[j] = sort_info->entries[j-1];
493 sort_info->entries[i] = new_entry;
495 new_entry->sysno = sysno;
496 new_entry->score = score;
499 void resultSetSort (ZebraHandle zh, NMEM nmem,
500 int num_input_setnames, const char **input_setnames,
501 const char *output_setname,
502 Z_SortKeySpecList *sort_sequence, int *sort_status)
507 if (num_input_setnames == 0)
512 if (num_input_setnames > 1)
517 logf (LOG_DEBUG, "result set sort input=%s output=%s",
518 *input_setnames, output_setname);
519 sset = resultSetGet (zh, input_setnames[0]);
523 zh->errString = nmem_strdup (nmem, input_setnames[0]);
526 if (!(rset = sset->rset))
529 zh->errString = nmem_strdup (nmem, input_setnames[0]);
532 if (strcmp (output_setname, input_setnames[0]))
534 rset = rset_dup (rset);
535 sset = resultSetAdd (zh, output_setname, 1);
538 resultSetSortSingle (zh, nmem, sset, rset, sort_sequence, sort_status);
541 void resultSetSortSingle (ZebraHandle zh, NMEM nmem,
542 ZebraSet sset, RSET rset,
543 Z_SortKeySpecList *sort_sequence, int *sort_status)
547 struct sortKeyInfo sort_criteria[3];
552 logf (LOG_LOG, "resultSetSortSingle start");
553 sset->sort_info->num_entries = 0;
556 num_criteria = sort_sequence->num_specs;
557 if (num_criteria > 3)
559 for (i = 0; i < num_criteria; i++)
561 Z_SortKeySpec *sks = sort_sequence->specs[i];
564 if (*sks->sortRelation == Z_SortRelation_ascending)
565 sort_criteria[i].relation = 'A';
566 else if (*sks->sortRelation == Z_SortRelation_descending)
567 sort_criteria[i].relation = 'D';
573 if (sks->sortElement->which == Z_SortElement_databaseSpecific)
578 else if (sks->sortElement->which != Z_SortElement_generic)
583 sk = sks->sortElement->u.generic;
586 case Z_SortKey_sortField:
587 logf (LOG_DEBUG, "Sort: key %d is of type sortField", i+1);
590 case Z_SortKey_elementSpec:
591 logf (LOG_DEBUG, "Sort: key %d is of type elementSpec", i+1);
594 case Z_SortKey_sortAttributes:
595 logf (LOG_DEBUG, "Sort: key %d is of type sortAttributes", i+1);
596 sort_criteria[i].attrUse =
597 zebra_maps_sort (zh->service->zebra_maps,
598 sk->u.sortAttributes,
599 &sort_criteria[i].numerical);
600 logf (LOG_DEBUG, "use value = %d", sort_criteria[i].attrUse);
601 if (sort_criteria[i].attrUse == -1)
606 if (sortIdx_type (zh->service->sortIdx, sort_criteria[i].attrUse))
614 rfd = rset_open (rset, RSETF_READ);
615 while (rset_read (rset, rfd, &key, &term_index))
617 if (key.sysno != psysno)
621 resultSetInsertSort (zh, sset,
622 sort_criteria, num_criteria, psysno);
625 rset_close (rset, rfd);
627 for (i = 0; i < rset->no_rset_terms; i++)
628 yaz_log (LOG_LOG, "term=\"%s\" nn=%d type=%s count=%d",
629 rset->rset_terms[i]->name,
630 rset->rset_terms[i]->nn,
631 rset->rset_terms[i]->flags,
632 rset->rset_terms[i]->count);
634 *sort_status = Z_SortStatus_success;
635 logf (LOG_LOG, "resultSetSortSingle end");
638 RSET resultSetRef (ZebraHandle zh, Z_ResultSetId *resultSetId)
642 if ((s = resultSetGet (zh, resultSetId)))
647 void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset)
653 ZebraRankClass rank_class;
654 struct rank_control *rc;
655 struct zset_sort_info *sort_info;
657 sort_info = zebraSet->sort_info;
658 sort_info->num_entries = 0;
660 rfd = rset_open (rset, RSETF_READ);
662 yaz_log (LOG_LOG, "resultSetRank");
664 rank_class = zebraRankLookup (zh, "rank-1");
665 rc = rank_class->control;
667 if (rset_read (rset, rfd, &key, &term_index))
669 int psysno = key.sysno;
672 (*rc->begin) (zh, rank_class->class_handle, rset);
677 if (key.sysno != psysno)
679 score = (*rc->calc) (handle, psysno);
681 resultSetInsertRank (zh, sort_info, psysno, score, 'A');
685 (*rc->add) (handle, key.seqno, term_index);
687 while (rset_read (rset, rfd, &key, &term_index));
688 score = (*rc->calc) (handle, psysno);
689 resultSetInsertRank (zh, sort_info, psysno, score, 'A');
690 (*rc->end) (zh, handle);
692 rset_close (rset, rfd);
694 for (i = 0; i < rset->no_rset_terms; i++)
695 yaz_log (LOG_LOG, "term=\"%s\" nn=%d type=%s count=%d",
696 rset->rset_terms[i]->name,
697 rset->rset_terms[i]->nn,
698 rset->rset_terms[i]->flags,
699 rset->rset_terms[i]->count);
701 yaz_log (LOG_LOG, "%d keys, %d distinct sysnos", kno, zebraSet->hits);
704 ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name)
706 ZebraRankClass p = zh->service->rank_classes;
707 while (p && strcmp (p->control->name, name))
709 if (p && !p->init_flag)
711 if (p->control->create)
712 p->class_handle = (*p->control->create)(zh->service);
718 void zebraRankInstall (ZebraService zh, struct rank_control *ctrl)
720 ZebraRankClass p = (ZebraRankClass) xmalloc (sizeof(*p));
721 p->control = (struct rank_control *) xmalloc (sizeof(*p->control));
722 memcpy (p->control, ctrl, sizeof(*p->control));
723 p->control->name = xstrdup (ctrl->name);
725 p->next = zh->rank_classes;
726 zh->rank_classes = p;
729 void zebraRankDestroy (ZebraService zh)
731 ZebraRankClass p = zh->rank_classes;
734 ZebraRankClass p_next = p->next;
735 if (p->init_flag && p->control->destroy)
736 (*p->control->destroy)(zh, p->class_handle);
737 xfree (p->control->name);
742 zh->rank_classes = NULL;