1 /* This file is part of the Zebra server.
2 Copyright (C) 1995-2008 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 #include <yaz/diagbib1.h>
34 #define ZSET_SORT_MAX_LEVEL 10
36 struct zebra_set_term_entry {
47 NMEM rset_nmem; /* for creating the rsets in */
50 const char **basenames;
52 Z_SortKeySpecList *sortSpec;
53 struct zset_sort_info *sort_info;
54 struct zebra_set_term_entry *term_entries;
56 struct zebra_set *next;
58 int estimated_hit_count;
60 zint cache_position; /* last position */
61 RSFD cache_rfd; /* rfd (NULL if not existing) */
62 zint cache_psysno; /* sysno for last position */
63 zint approx_limit; /* limit before we do approx */
66 struct zset_sort_entry {
71 struct zset_sort_info {
74 struct zset_sort_entry *all_entries;
75 struct zset_sort_entry **entries;
78 static int log_level_set=0;
79 static int log_level_sort=0;
80 static int log_level_searchhits=0;
81 static int log_level_searchterms=0;
82 static int log_level_resultsets=0;
84 static void loglevels(void)
88 log_level_sort = yaz_log_module_level("sorting");
89 log_level_searchhits = yaz_log_module_level("searchhits");
90 log_level_searchterms = yaz_log_module_level("searchterms");
91 log_level_resultsets = yaz_log_module_level("resultsets");
96 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
97 Z_RPNQuery *rpn, ZebraSet sset)
100 Z_SortKeySpecList *sort_sequence;
102 ZEBRA_RES res = ZEBRA_OK;
104 sort_sequence = (Z_SortKeySpecList *)
105 nmem_malloc(nmem, sizeof(*sort_sequence));
106 sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
107 sort_sequence->specs = (Z_SortKeySpec **)
108 nmem_malloc(nmem, sort_sequence->num_specs *
109 sizeof(*sort_sequence->specs));
110 for (i = 0; i<sort_sequence->num_specs; i++)
111 sort_sequence->specs[i] = 0;
113 rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
115 res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
118 sset->num_bases, sset->basenames,
125 for (i = 0; sort_sequence->specs[i]; i++)
127 sort_sequence->num_specs = i;
128 rset->hits_limit = sset->approx_limit;
131 res = resultSetRank(zh, sset, rset, rset_nmem);
135 res = resultSetSortSingle(zh, nmem, sset, rset,
136 sort_sequence, &sort_status);
143 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
144 int num_bases, char **basenames,
146 zint *hits, int *estimated_hit_count)
153 *estimated_hit_count = 0;
155 zebraSet = resultSetAdd(zh, setname, 1);
158 zebraSet->locked = 1;
161 zebraSet->rset_nmem = nmem_create();
163 zebraSet->num_bases = num_bases;
164 zebraSet->basenames =
165 nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
166 for (i = 0; i<num_bases; i++)
167 zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
169 res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
171 *hits = zebraSet->hits;
172 if (zebraSet->estimated_hit_count)
173 *estimated_hit_count = 1;
177 zebraSet->locked = 0;
183 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
184 const char *db, const char *index_name,
187 assert(zh); /* compiler shut up */
189 s->nmem = nmem_create();
190 if (!s->term_entries)
193 s->term_entries_max = 1000;
195 nmem_malloc(s->nmem, s->term_entries_max *
196 sizeof(*s->term_entries));
197 for (i = 0; i < s->term_entries_max; i++)
198 s->term_entries[i].term = 0;
200 if (s->hits < s->term_entries_max)
202 s->term_entries[s->hits].reg_type = reg_type;
203 s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
204 s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
205 s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
210 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
215 for (s = zh->sets; s; s = s->next)
216 if (!strcmp(s->name, name))
223 yaz_log(log_level_resultsets, "updating result set %s", name);
224 if (!ov || s->locked)
229 rset_close(s->cache_rfd);
230 rset_delete(s->rset);
233 nmem_destroy(s->rset_nmem);
235 nmem_destroy(s->nmem);
239 const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
241 yaz_log(log_level_resultsets, "adding result set %s", name);
242 s = (ZebraSet) xmalloc(sizeof(*s));
245 s->name = xstrdup(name);
247 s->sort_info = (struct zset_sort_info *)
248 xmalloc(sizeof(*s->sort_info));
249 s->sort_info->max_entries = atoi(sort_max_str);
250 if (s->sort_info->max_entries < 2)
251 s->sort_info->max_entries = 2;
253 s->sort_info->entries = (struct zset_sort_entry **)
254 xmalloc(sizeof(*s->sort_info->entries) *
255 s->sort_info->max_entries);
256 s->sort_info->all_entries = (struct zset_sort_entry *)
257 xmalloc(sizeof(*s->sort_info->all_entries) *
258 s->sort_info->max_entries);
259 for (i = 0; i < s->sort_info->max_entries; i++)
260 s->sort_info->entries[i] = s->sort_info->all_entries + i;
270 s->cache_position = 0;
272 s->approx_limit = zh->approx_limit;
273 s->estimated_hit_count = 0;
277 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
281 for (s = zh->sets; s; s = s->next)
282 if (!strcmp(s->name, name))
284 if (!s->term_entries && !s->rset && s->rpn)
286 NMEM nmem = nmem_create();
287 yaz_log(log_level_resultsets, "research %s", name);
289 s->rset_nmem = nmem_create();
290 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
291 if (s->rset && s->sortSpec)
294 yaz_log(log_level_resultsets, "resort %s", name);
295 resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
305 ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname,
306 const char ***basenames, int *num_bases)
308 ZebraSet sset = resultSetGet(zh, setname);
311 *basenames = sset->basenames;
312 *num_bases = sset->num_bases;
317 void resultSetInvalidate(ZebraHandle zh)
319 ZebraSet s = zh->sets;
321 yaz_log(log_level_resultsets, "invalidating result sets");
322 for (; s; s = s->next)
327 rset_close(s->cache_rfd);
328 rset_delete(s->rset);
332 s->cache_position = 0;
334 nmem_destroy(s->rset_nmem);
339 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
341 ZebraSet * ss = &zh->sets;
345 for (i = 0; i<num; i++)
346 statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
353 for (i = 0; i<num; i++)
354 if (!strcmp(s->name, names[i]))
357 statuses[i] = Z_DeleteStatus_success;
366 xfree(s->sort_info->all_entries);
367 xfree(s->sort_info->entries);
371 nmem_destroy(s->nmem);
375 rset_close(s->cache_rfd);
376 rset_delete(s->rset);
379 nmem_destroy(s->rset_nmem);
388 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
393 zint *pos = pos_small;
397 if (num > 10000 || num <= 0)
401 pos = xmalloc(sizeof(*pos) * num);
403 for (i = 0; i<num; i++)
406 mr = zebra_meta_records_create(zh, name, num, pos);
413 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name,
414 int num, zint *positions)
417 ZebraMetaRecord *sr = 0;
420 struct zset_sort_info *sort_info;
421 size_t sysno_mem_index = 0;
423 if (zh->m_staticrank)
428 if (!(sset = resultSetGet(zh, name)))
430 if (!(rset = sset->rset))
432 if (!sset->term_entries)
434 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
435 for (i = 0; i<num; i++)
442 if (positions[i] <= sset->term_entries_max)
444 sr[i].term = sset->term_entries[positions[i]-1].term;
445 sr[i].db = sset->term_entries[positions[i]-1].db;
451 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
452 for (i = 0; i<num; i++)
459 sort_info = sset->sort_info;
464 for (i = 0; i<num; i++)
466 position = positions[i];
467 if (position > 0 && position <= sort_info->num_entries)
469 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
470 " (sorted)", position);
471 sr[i].sysno = sort_info->entries[position-1]->sysno;
472 sr[i].score = sort_info->entries[position-1]->score;
476 /* did we really get all entries using sort ? */
477 for (i = 0; i<num; i++)
482 if (i < num) /* nope, get the rest, unsorted - sorry */
491 position = sort_info->num_entries;
492 while (num_i < num && positions[num_i] <= position)
495 if (sset->cache_rfd &&
496 num_i < num && positions[num_i] > sset->cache_position)
498 position = sset->cache_position;
499 rfd = sset->cache_rfd;
500 psysno = sset->cache_psysno;
505 rset_close(sset->cache_rfd);
506 rfd = rset_open(rset, RSETF_READ);
508 while (num_i < num && rset_read(rfd, &key, 0))
510 zint this_sys = key.mem[sysno_mem_index];
511 if (this_sys != psysno)
516 /* determine we alreay have this in our set */
517 for (i = sort_info->num_entries; --i >= 0; )
518 if (psysno == sort_info->entries[i]->sysno)
525 if (position == positions[num_i])
527 sr[num_i].sysno = psysno;
528 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
529 sr[num_i].score = -1;
534 sset->cache_position = position;
535 sset->cache_psysno = psysno;
536 sset->cache_rfd = rfd;
542 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
545 assert(zh); /* compiler shut up about unused arg */
551 int *ord; /* array of ord for each database searched */
552 int *numerical; /* array of ord for each database searched */
553 const char *index_type;
556 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
558 struct sortKeyInfo *criteria, int num_criteria,
560 char *cmp_buf[], char *tmp_cmp_buf[])
562 struct zset_sort_entry *new_entry = NULL;
563 struct zset_sort_info *sort_info = sset->sort_info;
565 WRBUF w = wrbuf_alloc();
567 zebra_sort_sysno(zh->reg->sort_index, sysno);
568 for (i = 0; i<num_criteria; i++)
570 char *this_entry_buf = tmp_cmp_buf[i];
571 memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
573 if (criteria[i].ord[database_no] != -1)
575 yaz_log(log_level_sort, "pre zebra_sort_type ord is %d",
576 criteria[i].ord[database_no]);
577 zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]);
579 zebra_sort_read(zh->reg->sort_index, w);
580 memcpy(this_entry_buf, wrbuf_buf(w),
581 (wrbuf_len(w) >= SORT_IDX_ENTRYSIZE) ?
582 SORT_IDX_ENTRYSIZE : wrbuf_len(w));
586 yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
590 i = sort_info->num_entries;
594 for (j = 0; j<num_criteria; j++)
596 char *this_entry_buf = tmp_cmp_buf[j];
597 char *other_entry_buf =
598 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
599 if (criteria[j].numerical[database_no])
601 char this_entry_org[1024];
602 char other_entry_org[1024];
604 /* when searching multiple databases, we use the index
605 type of the first one. So if they differ between
606 databases, we have a problem here we could store the
607 index_type for each database, but if we didn't find the
608 record in any sort index, then we still don't know to
609 which database it belongs. */
610 const char *index_type = criteria[j].index_type;
611 zebra_term_untrans(zh, index_type, this_entry_org,
613 zebra_term_untrans(zh, index_type, other_entry_org,
615 diff = atof(this_entry_org) - atof(other_entry_org);
626 rel = memcmp(this_entry_buf, other_entry_buf,
629 /* when the compare is equal, continue to next criteria,
636 if (criteria[j].relation == 'A')
641 else if (criteria[j].relation == 'D')
648 yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
649 j = sort_info->max_entries;
651 yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
655 if (sort_info->num_entries == j)
658 j = (sort_info->num_entries)++;
659 new_entry = sort_info->entries[j];
660 /* move up all higher entries (to make room) */
664 for (k = 0; k<num_criteria; k++)
666 char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
667 char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
668 memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
670 sort_info->entries[j] = sort_info->entries[j-1];
673 /* and insert the new entry at the correct place */
674 sort_info->entries[i] = new_entry;
676 /* and add this to the compare buffer */
677 for (i = 0; i<num_criteria; i++)
679 char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
680 char *this_entry_buf = tmp_cmp_buf[i];
681 memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
683 new_entry->sysno = sysno;
684 new_entry->score = -1;
687 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
688 zint sysno, int score, int relation)
690 struct zset_sort_entry *new_entry = NULL;
692 assert(zh); /* compiler shut up about unused arg */
694 i = sort_info->num_entries;
699 rel = score - sort_info->entries[i]->score;
706 else if (relation == 'A')
713 j = sort_info->max_entries;
717 if (sort_info->num_entries == j)
720 j = (sort_info->num_entries)++;
722 new_entry = sort_info->entries[j];
725 sort_info->entries[j] = sort_info->entries[j-1];
728 sort_info->entries[i] = new_entry;
730 new_entry->sysno = sysno;
731 new_entry->score = score;
734 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
737 ODR encode = odr_createmem(ODR_ENCODE);
738 ODR decode = odr_createmem(ODR_DECODE);
740 if (z_RPNQuery(encode, &src, 0, 0))
743 char *buf = odr_getbuf(encode, &len, 0);
747 odr_setbuf(decode, buf, len, 0);
748 z_RPNQuery(decode, &dst, 0, 0);
751 nmem_transfer(nmem, decode->mem);
757 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
759 Z_SortKeySpecList *dst = 0;
760 ODR encode = odr_createmem(ODR_ENCODE);
761 ODR decode = odr_createmem(ODR_DECODE);
763 if (z_SortKeySpecList(encode, &src, 0, 0))
766 char *buf = odr_getbuf(encode, &len, 0);
770 odr_setbuf(decode, buf, len, 0);
771 z_SortKeySpecList(decode, &dst, 0, 0);
774 nmem_transfer(nmem, decode->mem);
780 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
786 nset = resultSetAdd(zh, setname, 1);
790 nset->nmem = nmem_create();
792 nset->num_bases = rset->num_bases;
794 nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
795 for (i = 0; i<rset->num_bases; i++)
796 nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
799 nset->rset = rset_dup(rset->rset);
801 nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
805 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
806 int num_input_setnames, const char **input_setnames,
807 const char *output_setname,
808 Z_SortKeySpecList *sort_sequence, int *sort_status)
813 if (num_input_setnames == 0)
815 zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
818 if (num_input_setnames > 1)
820 zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
825 yaz_log(log_level_sort, "result set sort input=%s output=%s",
826 *input_setnames, output_setname);
827 sset = resultSetGet(zh, input_setnames[0]);
830 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
834 if (!(rset = sset->rset))
836 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
840 if (strcmp(output_setname, input_setnames[0]))
841 sset = resultSetClone(zh, output_setname, sset);
842 sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
843 return resultSetSortSingle(zh, nmem, sset, rset, sort_sequence,
847 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
848 ZebraSet sset, RSET rset,
849 Z_SortKeySpecList *sort_sequence,
858 struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
859 char *cmp_buf[ZSET_SORT_MAX_LEVEL];
860 char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
866 size_t sysno_mem_index = 0;
868 int numbases = zh->num_basenames;
869 yaz_log(log_level_sort, "searching %d databases",numbases);
871 if (zh->m_staticrank)
874 assert(nmem); /* compiler shut up about unused param */
875 sset->sort_info->num_entries = 0;
877 rset_getterms(rset, 0, 0, &n);
878 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
879 rset_getterms(rset, terms, n, &numTerms);
882 num_criteria = sort_sequence->num_specs;
883 if (num_criteria > ZSET_SORT_MAX_LEVEL)
884 num_criteria = ZSET_SORT_MAX_LEVEL;
885 /* set up the search criteria */
886 for (i = 0; i < num_criteria; i++)
888 Z_SortKeySpec *sks = sort_sequence->specs[i];
892 sort_criteria[i].ord = (int *)
893 nmem_malloc(nmem, sizeof(int)*numbases);
894 sort_criteria[i].numerical = (int *)
895 nmem_malloc(nmem, sizeof(int)*numbases);
897 /* initialize ord and numerical for each database */
898 for (ib = 0; ib < numbases; ib++)
900 sort_criteria[i].ord[ib] = -1;
901 sort_criteria[i].numerical[ib] = 0;
904 if (sks->which == Z_SortKeySpec_missingValueData)
906 zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
909 if (*sks->sortRelation == Z_SortKeySpec_ascending)
910 sort_criteria[i].relation = 'A';
911 else if (*sks->sortRelation == Z_SortKeySpec_descending)
912 sort_criteria[i].relation = 'D';
915 zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
918 if (sks->sortElement->which == Z_SortElement_databaseSpecific)
920 zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
923 else if (sks->sortElement->which != Z_SortElement_generic)
925 zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
928 sk = sks->sortElement->u.generic;
931 case Z_SortKey_sortField:
932 yaz_log(log_level_sort, "key %d is of type sortField", i+1);
933 for (ib = 0; ib < numbases; ib++)
935 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
936 sort_criteria[i].numerical[ib] = 0;
937 sort_criteria[i].ord[ib] =
938 zebraExplain_lookup_attr_str(zh->reg->zei,
939 zinfo_index_category_sort,
941 if (sks->which != Z_SortKeySpec_null
942 && sort_criteria[i].ord[ib] == -1)
945 YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
950 case Z_SortKey_elementSpec:
951 yaz_log(log_level_sort, "key %d is of type elementSpec", i+1);
952 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
954 case Z_SortKey_sortAttributes:
955 yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
956 /* for every database we searched, get the sort index file
957 id (ord) and its numerical indication and store them in
959 for (ib = 0; ib < numbases; ib++)
961 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
962 res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
963 &sort_criteria[i].ord[ib],
964 &sort_criteria[i].numerical[ib]);
967 if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
971 /* right now we look up the index type based on the first database
972 if the index_type's can differ between the indexes of different
973 databases (which i guess they can?) then we have to store the
974 index types for each database, just like the ord and numerical */
975 if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
976 &sort_criteria[i].index_type,
979 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
983 /* allocate space for each cmpare buf + one extra for tmp comparison */
984 /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
985 all other result entries to compare against. This is slowly filled when records are processed.
986 tmp_cmp_buf is an array with a value of the current record for each criteria
988 for (i = 0; i<num_criteria; i++)
990 cmp_buf[i] = xmalloc(sset->sort_info->max_entries
991 * SORT_IDX_ENTRYSIZE);
992 tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
994 rfd = rset_open(rset, RSETF_READ);
995 while (rset_read(rfd, &key, &termid))
997 zint this_sys = key.mem[sysno_mem_index];
998 if (log_level_searchhits)
999 key_logdump_txt(log_level_searchhits, &key, termid->name);
1001 if (this_sys != psysno)
1003 int database_no = 0;
1004 if ((sset->hits & 255) == 0 && zh->break_handler_func)
1006 if (zh->break_handler_func(zh->break_handler_data))
1008 rset_set_hits_limit(rset, 0);
1015 /* determine database from the term, but only bother if more than
1016 one database is in use*/
1017 if (numbases > 1 && termid->ol)
1019 const char *this_db = 0;
1020 if (zebraExplain_lookup_ord(zh->reg->zei, termid->ol->ord, 0, &this_db, 0)
1023 for (ib = 0; ib < numbases; ib++)
1024 if (!strcmp(this_db, zh->basenames[ib]))
1029 yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " database_no=%d", this_sys,
1031 ord_list_print(termid->ol);
1033 resultSetInsertSort(zh, sset, database_no,
1034 sort_criteria, num_criteria, psysno, cmp_buf,
1040 /* free the compare buffers */
1041 for (i = 0; i<num_criteria; i++)
1044 xfree(tmp_cmp_buf[i]);
1047 yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1049 for (i = 0; i < numTerms; i++)
1050 yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1051 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1052 *sort_status = Z_SortResponse_success;
1056 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1060 if ((s = resultSetGet(zh, resultSetId)))
1065 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
1066 RSET rset, NMEM nmem)
1075 ZebraRankClass rank_class;
1076 struct zset_sort_info *sort_info;
1077 const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1078 size_t sysno_mem_index = 0;
1080 if (zh->m_staticrank)
1081 sysno_mem_index = 1;
1085 sort_info = zebraSet->sort_info;
1086 sort_info->num_entries = 0;
1088 zebraSet->estimated_hit_count = 0;
1089 rset_getterms(rset, 0, 0, &n);
1090 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1091 rset_getterms(rset, terms, n, &numTerms);
1093 rank_class = zebraRankLookup(zh, rank_handler_name);
1096 yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1097 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1102 RSFD rfd = rset_open(rset, RSETF_READ);
1103 struct rank_control *rc = rank_class->control;
1106 void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1107 nmem, terms, numTerms);
1108 zint psysno = 0; /* previous doc id / sys no */
1109 zint pstaticrank = 0; /* previous static rank */
1111 while (rset_read(rfd, &key, &termid))
1113 zint this_sys = key.mem[sysno_mem_index];
1115 zint seqno = key.mem[key.len-1];
1117 if (log_level_searchhits)
1118 key_logdump_txt(log_level_searchhits, &key, termid->name);
1119 if (this_sys != psysno)
1120 { /* new record .. */
1121 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1123 if (zh->break_handler_func(zh->break_handler_data))
1125 yaz_log(YLOG_LOG, "Aborted search");
1129 if (rfd->counted_items > rset->hits_limit)
1132 { /* only if we did have a previous record */
1133 score = (*rc->calc)(handle, psysno, pstaticrank,
1135 /* insert the hit. A=Ascending */
1136 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1141 zebraSet->estimated_hit_count = 1;
1142 rset_set_hits_limit(rset, 0);
1146 if (zh->m_staticrank)
1147 pstaticrank = key.mem[0];
1149 (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1153 { /* we had - at least - one record */
1154 score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1155 /* insert the hit. A=Ascending */
1156 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1159 (*rc->end)(zh->reg, handle);
1162 zebraSet->hits = rset->hits_count;
1164 yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1165 ZINT_FORMAT " sysnos, rank", kno, zebraSet->hits);
1166 for (i = 0; i < numTerms; i++)
1168 yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1170 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1175 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1177 ZebraRankClass p = zh->reg->rank_classes;
1178 while (p && strcmp(p->control->name, name))
1180 if (p && !p->init_flag)
1182 if (p->control->create)
1183 p->class_handle = (*p->control->create)(zh);
1189 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1191 ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1192 p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1193 memcpy(p->control, ctrl, sizeof(*p->control));
1194 p->control->name = xstrdup(ctrl->name);
1196 p->next = reg->rank_classes;
1197 reg->rank_classes = p;
1200 void zebraRankDestroy(struct zebra_register *reg)
1202 ZebraRankClass p = reg->rank_classes;
1205 ZebraRankClass p_next = p->next;
1206 if (p->init_flag && p->control->destroy)
1207 (*p->control->destroy)(reg, p->class_handle);
1208 xfree(p->control->name);
1213 reg->rank_classes = NULL;
1216 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1217 zint *hits_array, int *approx_array)
1221 for (i = 0; i<rset->no_children; i++)
1222 no += trav_rset_for_termids(rset->children[i],
1223 (termid_array ? termid_array + no : 0),
1224 (hits_array ? hits_array + no : 0),
1225 (approx_array ? approx_array + no : 0));
1229 termid_array[no] = rset->term;
1231 hits_array[no] = rset->hits_count;
1233 approx_array[no] = rset->hits_approx;
1235 yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1236 " count=" ZINT_FORMAT,
1237 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1244 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1247 ZebraSet sset = resultSetGet(zh, setname);
1251 *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1257 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1258 int no, zint *count, int *approx,
1259 char *termbuf, size_t *termlen,
1260 const char **term_ref_id)
1262 ZebraSet sset = resultSetGet(zh, setname);
1265 int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1266 if (no >= 0 && no < num_terms)
1268 TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1269 zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1270 int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1272 trav_rset_for_termids(sset->rset, term_array,
1273 hits_array, approx_array);
1276 *count = hits_array[no];
1278 *approx = approx_array[no];
1281 char *inbuf = term_array[no]->name;
1282 size_t inleft = strlen(inbuf);
1283 size_t outleft = *termlen - 1;
1285 if (zh->iconv_from_utf8 != 0)
1287 char *outbuf = termbuf;
1290 ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1292 if (ret == (size_t)(-1))
1296 yaz_iconv(zh->iconv_from_utf8, 0, 0,
1298 *termlen = outbuf - termbuf;
1303 if (inleft > outleft)
1306 memcpy(termbuf, inbuf, *termlen);
1308 termbuf[*termlen] = '\0';
1311 *term_ref_id = term_array[no]->ref_id;
1315 xfree(approx_array);
1322 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1323 zint sysno, zebra_snippets *snippets)
1325 ZebraSet sset = resultSetGet(zh, setname);
1326 yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1332 struct rset_key_control *kc = zebra_key_control_create(zh);
1333 NMEM nmem = nmem_create();
1335 RSET rsets[2], rset_comb;
1336 RSET rset_temp = rset_create_temp(nmem, kc, kc->scope,
1337 res_get(zh->res, "setTmpDir"),0 );
1340 RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1347 rset_write(rsfd, &key);
1350 rsets[0] = rset_temp;
1351 rsets[1] = rset_dup(sset->rset);
1353 rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1355 rsfd = rset_open(rset_comb, RSETF_READ);
1357 while (rset_read(rsfd, &key, &termid))
1361 struct ord_list *ol;
1362 for (ol = termid->ol; ol; ol = ol->next)
1364 zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1365 ol->ord, termid->name);
1371 rset_delete(rset_comb);
1378 static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh,
1379 const char **basenames, int num_bases,
1381 zint *sysnos, int *no_sysnos)
1383 ZEBRA_RES res = ZEBRA_OK;
1384 int sysnos_offset = 0;
1387 if (!zh->reg->isamb || !zh->m_segment_indexing)
1389 if (sysnos_offset < *no_sysnos)
1395 for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1397 const char *database = basenames[i];
1398 if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1400 const char *index_type = "w";
1401 const char *use_string = "_ALLRECORDS";
1403 zinfo_index_category_t cat = zinfo_index_category_alwaysmatches;
1404 ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat,
1405 index_type, use_string);
1409 int ord_len = key_SU_encode(ord, ord_buf);
1412 ord_buf[ord_len] = '\0';
1414 info = dict_lookup(zh->reg->dict, ord_buf);
1417 if (*info != sizeof(ISAM_P))
1425 struct it_key key_until, key_found;
1429 memcpy(&isam_p, info+1, sizeof(ISAM_P));
1431 pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1436 key_until.mem[i++] = recid;
1437 key_until.mem[i++] = 0; /* section_id */
1438 if (zh->m_segment_indexing)
1439 key_until.mem[i++] = 0; /* segment */
1440 key_until.mem[i++] = 0;
1443 r = isamb_pp_forward(pt, &key_found, &key_until);
1444 while (r && key_found.mem[0] == recid)
1446 if (sysnos_offset < *no_sysnos)
1447 sysnos[sysnos_offset++] =
1448 key_found.mem[key_found.len-1];
1449 r = isamb_pp_read(pt, &key_found);
1459 *no_sysnos = sysnos_offset;
1463 ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh,
1464 const char *setname,
1466 zint *sysnos, int *no_sysnos)
1468 const char **basenames;
1472 res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1473 if (res != ZEBRA_OK)
1476 return zebra_recid_to_sysno(zh, basenames, num_bases,
1477 recid, sysnos, no_sysnos);
1480 void zebra_count_set(ZebraHandle zh, RSET rset, zint *count,
1487 yaz_log(YLOG_DEBUG, "count_set");
1489 rset->hits_limit = approx_limit;
1492 rfd = rset_open(rset, RSETF_READ);
1493 while (rset_read(rfd, &key,0 /* never mind terms */))
1495 if (key.mem[0] != psysno)
1497 psysno = key.mem[0];
1498 if (rfd->counted_items >= rset->hits_limit)
1503 *count = rset->hits_count;
1510 * indent-tabs-mode: nil
1512 * vim: shiftwidth=4 tabstop=8 expandtab