1 /* This file is part of the Zebra server.
2 Copyright (C) 1995-2008 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 #include <yaz/diagbib1.h>
34 #define ZSET_SORT_MAX_LEVEL 10
36 struct zebra_set_term_entry {
47 NMEM rset_nmem; /* for creating the rsets in */
50 const char **basenames;
52 Z_SortKeySpecList *sortSpec;
53 struct zset_sort_info *sort_info;
54 struct zebra_set_term_entry *term_entries;
56 struct zebra_set *next;
58 int estimated_hit_count;
60 zint cache_position; /* last position */
61 RSFD cache_rfd; /* rfd (NULL if not existing) */
62 zint cache_psysno; /* sysno for last position */
63 zint approx_limit; /* limit before we do approx */
66 struct zset_sort_entry {
71 struct zset_sort_info {
74 struct zset_sort_entry *all_entries;
75 struct zset_sort_entry **entries;
78 static int log_level_set=0;
79 static int log_level_sort=0;
80 static int log_level_searchhits=0;
81 static int log_level_searchterms=0;
82 static int log_level_resultsets=0;
84 static void loglevels(void)
88 log_level_sort = yaz_log_module_level("sorting");
89 log_level_searchhits = yaz_log_module_level("searchhits");
90 log_level_searchterms = yaz_log_module_level("searchterms");
91 log_level_resultsets = yaz_log_module_level("resultsets");
96 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
97 Z_RPNQuery *rpn, ZebraSet sset)
100 Z_SortKeySpecList *sort_sequence;
102 ZEBRA_RES res = ZEBRA_OK;
104 sort_sequence = (Z_SortKeySpecList *)
105 nmem_malloc(nmem, sizeof(*sort_sequence));
106 sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
107 sort_sequence->specs = (Z_SortKeySpec **)
108 nmem_malloc(nmem, sort_sequence->num_specs *
109 sizeof(*sort_sequence->specs));
110 for (i = 0; i<sort_sequence->num_specs; i++)
111 sort_sequence->specs[i] = 0;
113 rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
115 res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
118 sset->num_bases, sset->basenames,
125 for (i = 0; sort_sequence->specs[i]; i++)
127 sort_sequence->num_specs = i;
128 rset->hits_limit = sset->approx_limit;
131 res = resultSetRank(zh, sset, rset, rset_nmem);
135 res = resultSetSortSingle(zh, nmem, sset, rset,
136 sort_sequence, &sort_status);
143 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
144 int num_bases, char **basenames,
146 zint *hits, int *estimated_hit_count)
153 *estimated_hit_count = 0;
155 zebraSet = resultSetAdd(zh, setname, 1);
158 zebraSet->locked = 1;
161 zebraSet->rset_nmem = nmem_create();
163 zebraSet->num_bases = num_bases;
164 zebraSet->basenames =
165 nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
166 for (i = 0; i<num_bases; i++)
167 zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
169 res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
171 *hits = zebraSet->hits;
172 if (zebraSet->estimated_hit_count)
173 *estimated_hit_count = 1;
177 zebraSet->locked = 0;
183 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
184 const char *db, const char *index_name,
187 assert(zh); /* compiler shut up */
189 s->nmem = nmem_create();
190 if (!s->term_entries)
193 s->term_entries_max = 1000;
195 nmem_malloc(s->nmem, s->term_entries_max *
196 sizeof(*s->term_entries));
197 for (i = 0; i < s->term_entries_max; i++)
198 s->term_entries[i].term = 0;
200 if (s->hits < s->term_entries_max)
202 s->term_entries[s->hits].reg_type = reg_type;
203 s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
204 s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
205 s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
210 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
215 for (s = zh->sets; s; s = s->next)
216 if (!strcmp(s->name, name))
223 yaz_log(log_level_resultsets, "updating result set %s", name);
224 if (!ov || s->locked)
229 rset_close(s->cache_rfd);
230 rset_delete(s->rset);
233 nmem_destroy(s->rset_nmem);
235 nmem_destroy(s->nmem);
239 const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
241 yaz_log(log_level_resultsets, "adding result set %s", name);
242 s = (ZebraSet) xmalloc(sizeof(*s));
245 s->name = xstrdup(name);
247 s->sort_info = (struct zset_sort_info *)
248 xmalloc(sizeof(*s->sort_info));
249 s->sort_info->max_entries = atoi(sort_max_str);
250 if (s->sort_info->max_entries < 2)
251 s->sort_info->max_entries = 2;
253 s->sort_info->entries = (struct zset_sort_entry **)
254 xmalloc(sizeof(*s->sort_info->entries) *
255 s->sort_info->max_entries);
256 s->sort_info->all_entries = (struct zset_sort_entry *)
257 xmalloc(sizeof(*s->sort_info->all_entries) *
258 s->sort_info->max_entries);
259 for (i = 0; i < s->sort_info->max_entries; i++)
260 s->sort_info->entries[i] = s->sort_info->all_entries + i;
270 s->cache_position = 0;
272 s->approx_limit = zh->approx_limit;
273 s->estimated_hit_count = 0;
277 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
281 for (s = zh->sets; s; s = s->next)
282 if (!strcmp(s->name, name))
284 if (!s->term_entries && !s->rset && s->rpn)
286 NMEM nmem = nmem_create();
287 yaz_log(log_level_resultsets, "research %s", name);
289 s->rset_nmem = nmem_create();
290 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
291 if (s->rset && s->sortSpec)
294 yaz_log(log_level_resultsets, "resort %s", name);
295 resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
305 ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname,
306 const char ***basenames, int *num_bases)
308 ZebraSet sset = resultSetGet(zh, setname);
311 *basenames = sset->basenames;
312 *num_bases = sset->num_bases;
317 void resultSetInvalidate(ZebraHandle zh)
319 ZebraSet s = zh->sets;
321 yaz_log(log_level_resultsets, "invalidating result sets");
322 for (; s; s = s->next)
327 rset_close(s->cache_rfd);
328 rset_delete(s->rset);
332 s->cache_position = 0;
334 nmem_destroy(s->rset_nmem);
339 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
341 ZebraSet * ss = &zh->sets;
345 for (i = 0; i<num; i++)
346 statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
353 for (i = 0; i<num; i++)
354 if (!strcmp(s->name, names[i]))
357 statuses[i] = Z_DeleteStatus_success;
366 xfree(s->sort_info->all_entries);
367 xfree(s->sort_info->entries);
371 nmem_destroy(s->nmem);
375 rset_close(s->cache_rfd);
376 rset_delete(s->rset);
379 nmem_destroy(s->rset_nmem);
388 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
393 zint *pos = pos_small;
397 if (num > 10000 || num <= 0)
401 pos = xmalloc(sizeof(*pos) * num);
403 for (i = 0; i<num; i++)
406 mr = zebra_meta_records_create(zh, name, num, pos);
413 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name,
414 int num, zint *positions)
417 ZebraMetaRecord *sr = 0;
420 struct zset_sort_info *sort_info;
421 size_t sysno_mem_index = 0;
423 if (zh->m_staticrank)
428 if (!(sset = resultSetGet(zh, name)))
430 if (!(rset = sset->rset))
432 if (!sset->term_entries)
434 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
435 for (i = 0; i<num; i++)
442 if (positions[i] <= sset->term_entries_max)
444 sr[i].term = sset->term_entries[positions[i]-1].term;
445 sr[i].db = sset->term_entries[positions[i]-1].db;
451 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
452 for (i = 0; i<num; i++)
459 sort_info = sset->sort_info;
464 for (i = 0; i<num; i++)
466 position = positions[i];
467 if (position > 0 && position <= sort_info->num_entries)
469 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
470 " (sorted)", position);
471 sr[i].sysno = sort_info->entries[position-1]->sysno;
472 sr[i].score = sort_info->entries[position-1]->score;
476 /* did we really get all entries using sort ? */
477 for (i = 0; i<num; i++)
482 if (i < num) /* nope, get the rest, unsorted - sorry */
491 position = sort_info->num_entries;
492 while (num_i < num && positions[num_i] <= position)
495 if (sset->cache_rfd &&
496 num_i < num && positions[num_i] > sset->cache_position)
498 position = sset->cache_position;
499 rfd = sset->cache_rfd;
500 psysno = sset->cache_psysno;
505 rset_close(sset->cache_rfd);
506 rfd = rset_open(rset, RSETF_READ);
508 while (num_i < num && rset_read(rfd, &key, 0))
510 zint this_sys = key.mem[sysno_mem_index];
511 if (this_sys != psysno)
516 /* determine we alreay have this in our set */
517 for (i = sort_info->num_entries; --i >= 0; )
518 if (psysno == sort_info->entries[i]->sysno)
525 if (position == positions[num_i])
527 sr[num_i].sysno = psysno;
528 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
529 sr[num_i].score = -1;
534 sset->cache_position = position;
535 sset->cache_psysno = psysno;
536 sset->cache_rfd = rfd;
542 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
545 assert(zh); /* compiler shut up about unused arg */
551 int *ord; /* array of ord for each database searched */
552 int *numerical; /* array of ord for each database searched */
553 const char *index_type;
556 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
558 struct sortKeyInfo *criteria, int num_criteria,
560 char *cmp_buf[], char *tmp_cmp_buf[])
562 struct zset_sort_entry *new_entry = NULL;
563 struct zset_sort_info *sort_info = sset->sort_info;
565 WRBUF w = wrbuf_alloc();
567 zebra_sort_sysno(zh->reg->sort_index, sysno);
568 for (i = 0; i<num_criteria; i++)
570 char *this_entry_buf = tmp_cmp_buf[i];
571 memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
573 if (criteria[i].ord[database_no] != -1)
575 yaz_log(log_level_sort, "pre zebra_sort_type ord is %d",
576 criteria[i].ord[database_no]);
577 zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]);
579 if (zebra_sort_read(zh->reg->sort_index, w))
582 while (off != wrbuf_len(w))
584 size_t l = strlen(wrbuf_buf(w)+off);
585 assert(off < wrbuf_len(w));
587 if (l >= SORT_IDX_ENTRYSIZE)
588 l = SORT_IDX_ENTRYSIZE-1;
591 memcpy(this_entry_buf, wrbuf_buf(w)+off, l);
592 this_entry_buf[l] = '\0';
594 else if (criteria[i].relation == 'A')
596 if (strcmp(wrbuf_buf(w)+off, this_entry_buf) < 0)
598 memcpy(this_entry_buf, wrbuf_buf(w)+off, l);
599 this_entry_buf[l] = '\0';
602 else if (criteria[i].relation == 'D')
604 if (strcmp(wrbuf_buf(w)+off, this_entry_buf) > 0)
606 memcpy(this_entry_buf, wrbuf_buf(w)+off, l);
607 this_entry_buf[l] = '\0';
610 off += 1 + strlen(wrbuf_buf(w)+off);
616 yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
620 i = sort_info->num_entries;
624 for (j = 0; j<num_criteria; j++)
626 char *this_entry_buf = tmp_cmp_buf[j];
627 char *other_entry_buf =
628 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
629 if (criteria[j].numerical[database_no])
631 char this_entry_org[1024];
632 char other_entry_org[1024];
634 /* when searching multiple databases, we use the index
635 type of the first one. So if they differ between
636 databases, we have a problem here we could store the
637 index_type for each database, but if we didn't find the
638 record in any sort index, then we still don't know to
639 which database it belongs. */
640 const char *index_type = criteria[j].index_type;
641 zebra_term_untrans(zh, index_type, this_entry_org,
643 zebra_term_untrans(zh, index_type, other_entry_org,
645 diff = atof(this_entry_org) - atof(other_entry_org);
656 rel = memcmp(this_entry_buf, other_entry_buf,
659 /* when the compare is equal, continue to next criteria,
666 if (criteria[j].relation == 'A')
671 else if (criteria[j].relation == 'D')
678 yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
679 j = sort_info->max_entries;
681 yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
685 if (sort_info->num_entries == j)
688 j = (sort_info->num_entries)++;
689 new_entry = sort_info->entries[j];
690 /* move up all higher entries (to make room) */
694 for (k = 0; k<num_criteria; k++)
696 char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
697 char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
698 memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
700 sort_info->entries[j] = sort_info->entries[j-1];
703 /* and insert the new entry at the correct place */
704 sort_info->entries[i] = new_entry;
706 /* and add this to the compare buffer */
707 for (i = 0; i<num_criteria; i++)
709 char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
710 char *this_entry_buf = tmp_cmp_buf[i];
711 memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
713 new_entry->sysno = sysno;
714 new_entry->score = -1;
717 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
718 zint sysno, int score, int relation)
720 struct zset_sort_entry *new_entry = NULL;
722 assert(zh); /* compiler shut up about unused arg */
724 i = sort_info->num_entries;
729 rel = score - sort_info->entries[i]->score;
736 else if (relation == 'A')
743 j = sort_info->max_entries;
747 if (sort_info->num_entries == j)
750 j = (sort_info->num_entries)++;
752 new_entry = sort_info->entries[j];
755 sort_info->entries[j] = sort_info->entries[j-1];
758 sort_info->entries[i] = new_entry;
760 new_entry->sysno = sysno;
761 new_entry->score = score;
764 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
767 ODR encode = odr_createmem(ODR_ENCODE);
768 ODR decode = odr_createmem(ODR_DECODE);
770 if (z_RPNQuery(encode, &src, 0, 0))
773 char *buf = odr_getbuf(encode, &len, 0);
777 odr_setbuf(decode, buf, len, 0);
778 z_RPNQuery(decode, &dst, 0, 0);
781 nmem_transfer(nmem, decode->mem);
787 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
789 Z_SortKeySpecList *dst = 0;
790 ODR encode = odr_createmem(ODR_ENCODE);
791 ODR decode = odr_createmem(ODR_DECODE);
793 if (z_SortKeySpecList(encode, &src, 0, 0))
796 char *buf = odr_getbuf(encode, &len, 0);
800 odr_setbuf(decode, buf, len, 0);
801 z_SortKeySpecList(decode, &dst, 0, 0);
804 nmem_transfer(nmem, decode->mem);
810 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
816 nset = resultSetAdd(zh, setname, 1);
820 nset->nmem = nmem_create();
822 nset->num_bases = rset->num_bases;
824 nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
825 for (i = 0; i<rset->num_bases; i++)
826 nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
829 nset->rset = rset_dup(rset->rset);
831 nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
835 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
836 int num_input_setnames, const char **input_setnames,
837 const char *output_setname,
838 Z_SortKeySpecList *sort_sequence, int *sort_status)
843 if (num_input_setnames == 0)
845 zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
848 if (num_input_setnames > 1)
850 zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
855 yaz_log(log_level_sort, "result set sort input=%s output=%s",
856 *input_setnames, output_setname);
857 sset = resultSetGet(zh, input_setnames[0]);
860 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
864 if (!(rset = sset->rset))
866 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
870 if (strcmp(output_setname, input_setnames[0]))
871 sset = resultSetClone(zh, output_setname, sset);
872 sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
873 return resultSetSortSingle(zh, nmem, sset, rset, sort_sequence,
877 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
878 ZebraSet sset, RSET rset,
879 Z_SortKeySpecList *sort_sequence,
888 struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
889 char *cmp_buf[ZSET_SORT_MAX_LEVEL];
890 char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
896 size_t sysno_mem_index = 0;
898 int numbases = zh->num_basenames;
899 yaz_log(log_level_sort, "searching %d databases",numbases);
901 if (zh->m_staticrank)
904 assert(nmem); /* compiler shut up about unused param */
905 sset->sort_info->num_entries = 0;
907 rset_getterms(rset, 0, 0, &n);
908 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
909 rset_getterms(rset, terms, n, &numTerms);
912 num_criteria = sort_sequence->num_specs;
913 if (num_criteria > ZSET_SORT_MAX_LEVEL)
914 num_criteria = ZSET_SORT_MAX_LEVEL;
915 /* set up the search criteria */
916 for (i = 0; i < num_criteria; i++)
918 Z_SortKeySpec *sks = sort_sequence->specs[i];
922 sort_criteria[i].ord = (int *)
923 nmem_malloc(nmem, sizeof(int)*numbases);
924 sort_criteria[i].numerical = (int *)
925 nmem_malloc(nmem, sizeof(int)*numbases);
927 /* initialize ord and numerical for each database */
928 for (ib = 0; ib < numbases; ib++)
930 sort_criteria[i].ord[ib] = -1;
931 sort_criteria[i].numerical[ib] = 0;
934 if (sks->which == Z_SortKeySpec_missingValueData)
936 zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
939 if (*sks->sortRelation == Z_SortKeySpec_ascending)
940 sort_criteria[i].relation = 'A';
941 else if (*sks->sortRelation == Z_SortKeySpec_descending)
942 sort_criteria[i].relation = 'D';
945 zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
948 if (sks->sortElement->which == Z_SortElement_databaseSpecific)
950 zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
953 else if (sks->sortElement->which != Z_SortElement_generic)
955 zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
958 sk = sks->sortElement->u.generic;
961 case Z_SortKey_sortField:
962 yaz_log(log_level_sort, "key %d is of type sortField", i+1);
963 for (ib = 0; ib < numbases; ib++)
965 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
966 sort_criteria[i].numerical[ib] = 0;
967 sort_criteria[i].ord[ib] =
968 zebraExplain_lookup_attr_str(zh->reg->zei,
969 zinfo_index_category_sort,
971 if (sks->which != Z_SortKeySpec_null
972 && sort_criteria[i].ord[ib] == -1)
975 YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
980 case Z_SortKey_elementSpec:
981 yaz_log(log_level_sort, "key %d is of type elementSpec", i+1);
982 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
984 case Z_SortKey_sortAttributes:
985 yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
986 /* for every database we searched, get the sort index file
987 id (ord) and its numerical indication and store them in
989 for (ib = 0; ib < numbases; ib++)
991 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
992 res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
993 &sort_criteria[i].ord[ib],
994 &sort_criteria[i].numerical[ib]);
997 if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
1001 /* right now we look up the index type based on the first database
1002 if the index_type's can differ between the indexes of different
1003 databases (which i guess they can?) then we have to store the
1004 index types for each database, just like the ord and numerical */
1005 if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
1006 &sort_criteria[i].index_type,
1009 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
1013 /* allocate space for each cmpare buf + one extra for tmp comparison */
1014 /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
1015 all other result entries to compare against. This is slowly filled when records are processed.
1016 tmp_cmp_buf is an array with a value of the current record for each criteria
1018 for (i = 0; i<num_criteria; i++)
1020 cmp_buf[i] = xmalloc(sset->sort_info->max_entries
1021 * SORT_IDX_ENTRYSIZE);
1022 tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
1024 rfd = rset_open(rset, RSETF_READ);
1025 while (rset_read(rfd, &key, &termid))
1027 zint this_sys = key.mem[sysno_mem_index];
1028 if (log_level_searchhits)
1029 key_logdump_txt(log_level_searchhits, &key, termid->name);
1031 if (this_sys != psysno)
1033 int database_no = 0;
1034 if ((sset->hits & 255) == 0 && zh->break_handler_func)
1036 if (zh->break_handler_func(zh->break_handler_data))
1038 rset_set_hits_limit(rset, 0);
1045 /* determine database from the term, but only bother if more than
1046 one database is in use*/
1047 if (numbases > 1 && termid->ol)
1049 const char *this_db = 0;
1050 if (zebraExplain_lookup_ord(zh->reg->zei, termid->ol->ord, 0, &this_db, 0)
1053 for (ib = 0; ib < numbases; ib++)
1054 if (!strcmp(this_db, zh->basenames[ib]))
1059 yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " database_no=%d", this_sys,
1061 ord_list_print(termid->ol);
1063 resultSetInsertSort(zh, sset, database_no,
1064 sort_criteria, num_criteria, psysno, cmp_buf,
1070 /* free the compare buffers */
1071 for (i = 0; i<num_criteria; i++)
1074 xfree(tmp_cmp_buf[i]);
1077 yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1079 for (i = 0; i < numTerms; i++)
1080 yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1081 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1082 *sort_status = Z_SortResponse_success;
1086 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1090 if ((s = resultSetGet(zh, resultSetId)))
1095 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
1096 RSET rset, NMEM nmem)
1105 ZebraRankClass rank_class;
1106 struct zset_sort_info *sort_info;
1107 const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1108 size_t sysno_mem_index = 0;
1110 if (zh->m_staticrank)
1111 sysno_mem_index = 1;
1115 sort_info = zebraSet->sort_info;
1116 sort_info->num_entries = 0;
1118 zebraSet->estimated_hit_count = 0;
1119 rset_getterms(rset, 0, 0, &n);
1120 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1121 rset_getterms(rset, terms, n, &numTerms);
1123 rank_class = zebraRankLookup(zh, rank_handler_name);
1126 yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1127 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1132 RSFD rfd = rset_open(rset, RSETF_READ);
1133 struct rank_control *rc = rank_class->control;
1136 void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1137 nmem, terms, numTerms);
1138 zint psysno = 0; /* previous doc id / sys no */
1139 zint pstaticrank = 0; /* previous static rank */
1141 while (rset_read(rfd, &key, &termid))
1143 zint this_sys = key.mem[sysno_mem_index];
1145 zint seqno = key.mem[key.len-1];
1147 if (log_level_searchhits)
1148 key_logdump_txt(log_level_searchhits, &key, termid->name);
1149 if (this_sys != psysno)
1150 { /* new record .. */
1151 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1153 if (zh->break_handler_func(zh->break_handler_data))
1155 yaz_log(YLOG_LOG, "Aborted search");
1159 if (rfd->counted_items > rset->hits_limit)
1162 { /* only if we did have a previous record */
1163 score = (*rc->calc)(handle, psysno, pstaticrank,
1165 /* insert the hit. A=Ascending */
1166 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1171 zebraSet->estimated_hit_count = 1;
1172 rset_set_hits_limit(rset, 0);
1176 if (zh->m_staticrank)
1177 pstaticrank = key.mem[0];
1179 (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1183 { /* we had - at least - one record */
1184 score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1185 /* insert the hit. A=Ascending */
1186 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1189 (*rc->end)(zh->reg, handle);
1192 zebraSet->hits = rset->hits_count;
1194 yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1195 ZINT_FORMAT " sysnos, rank", kno, zebraSet->hits);
1196 for (i = 0; i < numTerms; i++)
1198 yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1200 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1205 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1207 ZebraRankClass p = zh->reg->rank_classes;
1208 while (p && strcmp(p->control->name, name))
1210 if (p && !p->init_flag)
1212 if (p->control->create)
1213 p->class_handle = (*p->control->create)(zh);
1219 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1221 ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1222 p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1223 memcpy(p->control, ctrl, sizeof(*p->control));
1224 p->control->name = xstrdup(ctrl->name);
1226 p->next = reg->rank_classes;
1227 reg->rank_classes = p;
1230 void zebraRankDestroy(struct zebra_register *reg)
1232 ZebraRankClass p = reg->rank_classes;
1235 ZebraRankClass p_next = p->next;
1236 if (p->init_flag && p->control->destroy)
1237 (*p->control->destroy)(reg, p->class_handle);
1238 xfree(p->control->name);
1243 reg->rank_classes = NULL;
1246 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1247 zint *hits_array, int *approx_array)
1251 for (i = 0; i<rset->no_children; i++)
1252 no += trav_rset_for_termids(rset->children[i],
1253 (termid_array ? termid_array + no : 0),
1254 (hits_array ? hits_array + no : 0),
1255 (approx_array ? approx_array + no : 0));
1259 termid_array[no] = rset->term;
1261 hits_array[no] = rset->hits_count;
1263 approx_array[no] = rset->hits_approx;
1265 yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1266 " count=" ZINT_FORMAT,
1267 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1274 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1277 ZebraSet sset = resultSetGet(zh, setname);
1281 *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1287 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1288 int no, zint *count, int *approx,
1289 char *termbuf, size_t *termlen,
1290 const char **term_ref_id)
1292 ZebraSet sset = resultSetGet(zh, setname);
1295 int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1296 if (no >= 0 && no < num_terms)
1298 TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1299 zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1300 int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1302 trav_rset_for_termids(sset->rset, term_array,
1303 hits_array, approx_array);
1306 *count = hits_array[no];
1308 *approx = approx_array[no];
1311 char *inbuf = term_array[no]->name;
1312 size_t inleft = strlen(inbuf);
1313 size_t outleft = *termlen - 1;
1315 if (zh->iconv_from_utf8 != 0)
1317 char *outbuf = termbuf;
1320 ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1322 if (ret == (size_t)(-1))
1326 yaz_iconv(zh->iconv_from_utf8, 0, 0,
1328 *termlen = outbuf - termbuf;
1333 if (inleft > outleft)
1336 memcpy(termbuf, inbuf, *termlen);
1338 termbuf[*termlen] = '\0';
1341 *term_ref_id = term_array[no]->ref_id;
1345 xfree(approx_array);
1352 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1353 zint sysno, zebra_snippets *snippets)
1355 ZebraSet sset = resultSetGet(zh, setname);
1356 yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1362 struct rset_key_control *kc = zebra_key_control_create(zh);
1363 NMEM nmem = nmem_create();
1365 RSET rsets[2], rset_comb;
1366 RSET rset_temp = rset_create_temp(nmem, kc, kc->scope,
1367 res_get(zh->res, "setTmpDir"),0 );
1370 RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1377 rset_write(rsfd, &key);
1380 rsets[0] = rset_temp;
1381 rsets[1] = rset_dup(sset->rset);
1383 rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1385 rsfd = rset_open(rset_comb, RSETF_READ);
1387 while (rset_read(rsfd, &key, &termid))
1391 struct ord_list *ol;
1392 for (ol = termid->ol; ol; ol = ol->next)
1394 zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1395 ol->ord, termid->name);
1401 rset_delete(rset_comb);
1408 static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh,
1409 const char **basenames, int num_bases,
1411 zint *sysnos, int *no_sysnos)
1413 ZEBRA_RES res = ZEBRA_OK;
1414 int sysnos_offset = 0;
1417 if (!zh->reg->isamb || !zh->m_segment_indexing)
1419 if (sysnos_offset < *no_sysnos)
1425 for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1427 const char *database = basenames[i];
1428 if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1430 const char *index_type = "w";
1431 const char *use_string = "_ALLRECORDS";
1433 zinfo_index_category_t cat = zinfo_index_category_alwaysmatches;
1434 ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat,
1435 index_type, use_string);
1439 int ord_len = key_SU_encode(ord, ord_buf);
1442 ord_buf[ord_len] = '\0';
1444 info = dict_lookup(zh->reg->dict, ord_buf);
1447 if (*info != sizeof(ISAM_P))
1455 struct it_key key_until, key_found;
1459 memcpy(&isam_p, info+1, sizeof(ISAM_P));
1461 pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1466 key_until.mem[i++] = recid;
1467 key_until.mem[i++] = 0; /* section_id */
1468 if (zh->m_segment_indexing)
1469 key_until.mem[i++] = 0; /* segment */
1470 key_until.mem[i++] = 0;
1473 r = isamb_pp_forward(pt, &key_found, &key_until);
1474 while (r && key_found.mem[0] == recid)
1476 if (sysnos_offset < *no_sysnos)
1477 sysnos[sysnos_offset++] =
1478 key_found.mem[key_found.len-1];
1479 r = isamb_pp_read(pt, &key_found);
1489 *no_sysnos = sysnos_offset;
1493 ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh,
1494 const char *setname,
1496 zint *sysnos, int *no_sysnos)
1498 const char **basenames;
1502 res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1503 if (res != ZEBRA_OK)
1506 return zebra_recid_to_sysno(zh, basenames, num_bases,
1507 recid, sysnos, no_sysnos);
1510 void zebra_count_set(ZebraHandle zh, RSET rset, zint *count,
1517 yaz_log(YLOG_DEBUG, "count_set");
1519 rset->hits_limit = approx_limit;
1522 rfd = rset_open(rset, RSETF_READ);
1523 while (rset_read(rfd, &key,0 /* never mind terms */))
1525 if (key.mem[0] != psysno)
1527 psysno = key.mem[0];
1528 if (rfd->counted_items >= rset->hits_limit)
1533 *count = rset->hits_count;
1540 * indent-tabs-mode: nil
1542 * vim: shiftwidth=4 tabstop=8 expandtab