1 /* This file is part of the Zebra server.
2 Copyright (C) 1995-2008 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 #include <yaz/diagbib1.h>
34 #define ZSET_SORT_MAX_LEVEL 10
36 struct zebra_set_term_entry {
47 NMEM rset_nmem; /* for creating the rsets in */
50 const char **basenames;
52 Z_SortKeySpecList *sortSpec;
53 struct zset_sort_info *sort_info;
54 struct zebra_set_term_entry *term_entries;
56 struct zebra_set *next;
58 int estimated_hit_count;
60 zint cache_position; /* last position */
61 RSFD cache_rfd; /* rfd (NULL if not existing) */
62 zint cache_psysno; /* sysno for last position */
63 zint approx_limit; /* limit before we do approx */
66 struct zset_sort_entry {
71 struct zset_sort_info {
74 struct zset_sort_entry *all_entries;
75 struct zset_sort_entry **entries;
78 static int log_level_set=0;
79 static int log_level_sort=0;
80 static int log_level_searchhits=0;
81 static int log_level_searchterms=0;
82 static int log_level_resultsets=0;
84 static void loglevels(void)
88 log_level_sort = yaz_log_module_level("sorting");
89 log_level_searchhits = yaz_log_module_level("searchhits");
90 log_level_searchterms = yaz_log_module_level("searchterms");
91 log_level_resultsets = yaz_log_module_level("resultsets");
96 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
97 Z_RPNQuery *rpn, ZebraSet sset)
100 Z_SortKeySpecList *sort_sequence;
102 ZEBRA_RES res = ZEBRA_OK;
104 sort_sequence = (Z_SortKeySpecList *)
105 nmem_malloc(nmem, sizeof(*sort_sequence));
106 sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
107 sort_sequence->specs = (Z_SortKeySpec **)
108 nmem_malloc(nmem, sort_sequence->num_specs *
109 sizeof(*sort_sequence->specs));
110 for (i = 0; i<sort_sequence->num_specs; i++)
111 sort_sequence->specs[i] = 0;
113 rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
115 res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
118 sset->num_bases, sset->basenames,
125 for (i = 0; sort_sequence->specs[i]; i++)
127 sort_sequence->num_specs = i;
128 rset->hits_limit = sset->approx_limit;
131 res = resultSetRank(zh, sset, rset, rset_nmem);
135 res = resultSetSortSingle(zh, nmem, sset, rset,
136 sort_sequence, &sort_status);
143 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
144 int num_bases, char **basenames,
146 zint *hits, int *estimated_hit_count)
153 *estimated_hit_count = 0;
155 zebraSet = resultSetAdd(zh, setname, 1);
158 zebraSet->locked = 1;
161 zebraSet->rset_nmem = nmem_create();
163 zebraSet->num_bases = num_bases;
164 zebraSet->basenames =
165 nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
166 for (i = 0; i<num_bases; i++)
167 zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
169 res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
171 *hits = zebraSet->hits;
172 if (zebraSet->estimated_hit_count)
173 *estimated_hit_count = 1;
177 zebraSet->locked = 0;
183 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
184 const char *db, const char *index_name,
187 assert(zh); /* compiler shut up */
189 s->nmem = nmem_create();
190 if (!s->term_entries)
193 s->term_entries_max = 1000;
195 nmem_malloc(s->nmem, s->term_entries_max *
196 sizeof(*s->term_entries));
197 for (i = 0; i < s->term_entries_max; i++)
198 s->term_entries[i].term = 0;
200 if (s->hits < s->term_entries_max)
202 s->term_entries[s->hits].reg_type = reg_type;
203 s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
204 s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
205 s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
210 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
215 for (s = zh->sets; s; s = s->next)
216 if (!strcmp(s->name, name))
223 yaz_log(log_level_resultsets, "updating result set %s", name);
224 if (!ov || s->locked)
229 rset_close(s->cache_rfd);
230 rset_delete(s->rset);
233 nmem_destroy(s->rset_nmem);
235 nmem_destroy(s->nmem);
239 const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
241 yaz_log(log_level_resultsets, "adding result set %s", name);
242 s = (ZebraSet) xmalloc(sizeof(*s));
245 s->name = xstrdup(name);
247 s->sort_info = (struct zset_sort_info *)
248 xmalloc(sizeof(*s->sort_info));
249 s->sort_info->max_entries = atoi(sort_max_str);
250 if (s->sort_info->max_entries < 2)
251 s->sort_info->max_entries = 2;
253 s->sort_info->entries = (struct zset_sort_entry **)
254 xmalloc(sizeof(*s->sort_info->entries) *
255 s->sort_info->max_entries);
256 s->sort_info->all_entries = (struct zset_sort_entry *)
257 xmalloc(sizeof(*s->sort_info->all_entries) *
258 s->sort_info->max_entries);
259 for (i = 0; i < s->sort_info->max_entries; i++)
260 s->sort_info->entries[i] = s->sort_info->all_entries + i;
270 s->cache_position = 0;
272 s->approx_limit = zh->approx_limit;
273 s->estimated_hit_count = 0;
277 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
281 for (s = zh->sets; s; s = s->next)
282 if (!strcmp(s->name, name))
284 if (!s->term_entries && !s->rset && s->rpn)
286 NMEM nmem = nmem_create();
287 yaz_log(log_level_resultsets, "research %s", name);
289 s->rset_nmem = nmem_create();
290 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
291 if (s->rset && s->sortSpec)
294 yaz_log(log_level_resultsets, "resort %s", name);
295 resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
305 ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname,
306 const char ***basenames, int *num_bases)
308 ZebraSet sset = resultSetGet(zh, setname);
311 *basenames = sset->basenames;
312 *num_bases = sset->num_bases;
317 void resultSetInvalidate(ZebraHandle zh)
319 ZebraSet s = zh->sets;
321 yaz_log(log_level_resultsets, "invalidating result sets");
322 for (; s; s = s->next)
327 rset_close(s->cache_rfd);
328 rset_delete(s->rset);
332 s->cache_position = 0;
334 nmem_destroy(s->rset_nmem);
339 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
341 ZebraSet * ss = &zh->sets;
345 for (i = 0; i<num; i++)
346 statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
353 for (i = 0; i<num; i++)
354 if (!strcmp(s->name, names[i]))
357 statuses[i] = Z_DeleteStatus_success;
366 xfree(s->sort_info->all_entries);
367 xfree(s->sort_info->entries);
371 nmem_destroy(s->nmem);
375 rset_close(s->cache_rfd);
376 rset_delete(s->rset);
379 nmem_destroy(s->rset_nmem);
388 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
393 zint *pos = pos_small;
397 if (num > 10000 || num <= 0)
401 pos = xmalloc(sizeof(*pos) * num);
403 for (i = 0; i<num; i++)
406 mr = zebra_meta_records_create(zh, name, num, pos);
413 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name,
414 int num, zint *positions)
417 ZebraMetaRecord *sr = 0;
420 struct zset_sort_info *sort_info;
421 size_t sysno_mem_index = 0;
423 if (zh->m_staticrank)
428 if (!(sset = resultSetGet(zh, name)))
430 if (!(rset = sset->rset))
432 if (!sset->term_entries)
434 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
435 for (i = 0; i<num; i++)
442 if (positions[i] <= sset->term_entries_max)
444 sr[i].term = sset->term_entries[positions[i]-1].term;
445 sr[i].db = sset->term_entries[positions[i]-1].db;
451 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
452 for (i = 0; i<num; i++)
459 sort_info = sset->sort_info;
464 for (i = 0; i<num; i++)
466 position = positions[i];
467 if (position > 0 && position <= sort_info->num_entries)
469 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
470 " (sorted)", position);
471 sr[i].sysno = sort_info->entries[position-1]->sysno;
472 sr[i].score = sort_info->entries[position-1]->score;
476 /* did we really get all entries using sort ? */
477 for (i = 0; i<num; i++)
482 if (i < num) /* nope, get the rest, unsorted - sorry */
491 position = sort_info->num_entries;
492 while (num_i < num && positions[num_i] <= position)
495 if (sset->cache_rfd &&
496 num_i < num && positions[num_i] > sset->cache_position)
498 position = sset->cache_position;
499 rfd = sset->cache_rfd;
500 psysno = sset->cache_psysno;
505 rset_close(sset->cache_rfd);
506 rfd = rset_open(rset, RSETF_READ);
508 while (num_i < num && rset_read(rfd, &key, 0))
510 zint this_sys = key.mem[sysno_mem_index];
511 if (this_sys != psysno)
516 /* determine we alreay have this in our set */
517 for (i = sort_info->num_entries; --i >= 0; )
518 if (psysno == sort_info->entries[i]->sysno)
525 if (position == positions[num_i])
527 sr[num_i].sysno = psysno;
528 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
529 sr[num_i].score = -1;
534 sset->cache_position = position;
535 sset->cache_psysno = psysno;
536 sset->cache_rfd = rfd;
542 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
545 assert(zh); /* compiler shut up about unused arg */
551 int *ord; /* array of ord for each database searched */
552 int *numerical; /* array of ord for each database searched */
553 const char *index_type;
556 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
558 struct sortKeyInfo *criteria, int num_criteria,
560 char *cmp_buf[], char *tmp_cmp_buf[])
562 struct zset_sort_entry *new_entry = NULL;
563 struct zset_sort_info *sort_info = sset->sort_info;
565 WRBUF w = wrbuf_alloc();
567 zebra_sort_sysno(zh->reg->sort_index, sysno);
568 for (i = 0; i<num_criteria; i++)
570 char *this_entry_buf = tmp_cmp_buf[i];
571 memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
573 if (criteria[i].ord[database_no] != -1)
575 yaz_log(log_level_sort, "pre zebra_sort_type ord is %d",
576 criteria[i].ord[database_no]);
577 zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]);
579 if (zebra_sort_read(zh->reg->sort_index, w))
582 while (off != wrbuf_len(w))
584 assert(off < wrbuf_len(w));
586 strcpy(this_entry_buf, wrbuf_buf(w));
587 else if (criteria[i].relation == 'A')
589 if (strcmp(wrbuf_buf(w)+off, this_entry_buf) < 0)
590 strcpy(this_entry_buf, wrbuf_buf(w)+off);
592 else if (criteria[i].relation == 'D')
594 if (strcmp(wrbuf_buf(w)+off, this_entry_buf) > 0)
595 strcpy(this_entry_buf, wrbuf_buf(w)+off);
597 off += 1 + strlen(wrbuf_buf(w)+off);
603 yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
607 i = sort_info->num_entries;
611 for (j = 0; j<num_criteria; j++)
613 char *this_entry_buf = tmp_cmp_buf[j];
614 char *other_entry_buf =
615 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
616 if (criteria[j].numerical[database_no])
618 char this_entry_org[1024];
619 char other_entry_org[1024];
621 /* when searching multiple databases, we use the index
622 type of the first one. So if they differ between
623 databases, we have a problem here we could store the
624 index_type for each database, but if we didn't find the
625 record in any sort index, then we still don't know to
626 which database it belongs. */
627 const char *index_type = criteria[j].index_type;
628 zebra_term_untrans(zh, index_type, this_entry_org,
630 zebra_term_untrans(zh, index_type, other_entry_org,
632 diff = atof(this_entry_org) - atof(other_entry_org);
643 rel = memcmp(this_entry_buf, other_entry_buf,
646 /* when the compare is equal, continue to next criteria,
653 if (criteria[j].relation == 'A')
658 else if (criteria[j].relation == 'D')
665 yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
666 j = sort_info->max_entries;
668 yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
672 if (sort_info->num_entries == j)
675 j = (sort_info->num_entries)++;
676 new_entry = sort_info->entries[j];
677 /* move up all higher entries (to make room) */
681 for (k = 0; k<num_criteria; k++)
683 char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
684 char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
685 memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
687 sort_info->entries[j] = sort_info->entries[j-1];
690 /* and insert the new entry at the correct place */
691 sort_info->entries[i] = new_entry;
693 /* and add this to the compare buffer */
694 for (i = 0; i<num_criteria; i++)
696 char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
697 char *this_entry_buf = tmp_cmp_buf[i];
698 memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
700 new_entry->sysno = sysno;
701 new_entry->score = -1;
704 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
705 zint sysno, int score, int relation)
707 struct zset_sort_entry *new_entry = NULL;
709 assert(zh); /* compiler shut up about unused arg */
711 i = sort_info->num_entries;
716 rel = score - sort_info->entries[i]->score;
723 else if (relation == 'A')
730 j = sort_info->max_entries;
734 if (sort_info->num_entries == j)
737 j = (sort_info->num_entries)++;
739 new_entry = sort_info->entries[j];
742 sort_info->entries[j] = sort_info->entries[j-1];
745 sort_info->entries[i] = new_entry;
747 new_entry->sysno = sysno;
748 new_entry->score = score;
751 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
754 ODR encode = odr_createmem(ODR_ENCODE);
755 ODR decode = odr_createmem(ODR_DECODE);
757 if (z_RPNQuery(encode, &src, 0, 0))
760 char *buf = odr_getbuf(encode, &len, 0);
764 odr_setbuf(decode, buf, len, 0);
765 z_RPNQuery(decode, &dst, 0, 0);
768 nmem_transfer(nmem, decode->mem);
774 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
776 Z_SortKeySpecList *dst = 0;
777 ODR encode = odr_createmem(ODR_ENCODE);
778 ODR decode = odr_createmem(ODR_DECODE);
780 if (z_SortKeySpecList(encode, &src, 0, 0))
783 char *buf = odr_getbuf(encode, &len, 0);
787 odr_setbuf(decode, buf, len, 0);
788 z_SortKeySpecList(decode, &dst, 0, 0);
791 nmem_transfer(nmem, decode->mem);
797 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
803 nset = resultSetAdd(zh, setname, 1);
807 nset->nmem = nmem_create();
809 nset->num_bases = rset->num_bases;
811 nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
812 for (i = 0; i<rset->num_bases; i++)
813 nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
816 nset->rset = rset_dup(rset->rset);
818 nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
822 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
823 int num_input_setnames, const char **input_setnames,
824 const char *output_setname,
825 Z_SortKeySpecList *sort_sequence, int *sort_status)
830 if (num_input_setnames == 0)
832 zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
835 if (num_input_setnames > 1)
837 zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
842 yaz_log(log_level_sort, "result set sort input=%s output=%s",
843 *input_setnames, output_setname);
844 sset = resultSetGet(zh, input_setnames[0]);
847 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
851 if (!(rset = sset->rset))
853 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
857 if (strcmp(output_setname, input_setnames[0]))
858 sset = resultSetClone(zh, output_setname, sset);
859 sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
860 return resultSetSortSingle(zh, nmem, sset, rset, sort_sequence,
864 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
865 ZebraSet sset, RSET rset,
866 Z_SortKeySpecList *sort_sequence,
875 struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
876 char *cmp_buf[ZSET_SORT_MAX_LEVEL];
877 char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
883 size_t sysno_mem_index = 0;
885 int numbases = zh->num_basenames;
886 yaz_log(log_level_sort, "searching %d databases",numbases);
888 if (zh->m_staticrank)
891 assert(nmem); /* compiler shut up about unused param */
892 sset->sort_info->num_entries = 0;
894 rset_getterms(rset, 0, 0, &n);
895 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
896 rset_getterms(rset, terms, n, &numTerms);
899 num_criteria = sort_sequence->num_specs;
900 if (num_criteria > ZSET_SORT_MAX_LEVEL)
901 num_criteria = ZSET_SORT_MAX_LEVEL;
902 /* set up the search criteria */
903 for (i = 0; i < num_criteria; i++)
905 Z_SortKeySpec *sks = sort_sequence->specs[i];
909 sort_criteria[i].ord = (int *)
910 nmem_malloc(nmem, sizeof(int)*numbases);
911 sort_criteria[i].numerical = (int *)
912 nmem_malloc(nmem, sizeof(int)*numbases);
914 /* initialize ord and numerical for each database */
915 for (ib = 0; ib < numbases; ib++)
917 sort_criteria[i].ord[ib] = -1;
918 sort_criteria[i].numerical[ib] = 0;
921 if (sks->which == Z_SortKeySpec_missingValueData)
923 zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
926 if (*sks->sortRelation == Z_SortKeySpec_ascending)
927 sort_criteria[i].relation = 'A';
928 else if (*sks->sortRelation == Z_SortKeySpec_descending)
929 sort_criteria[i].relation = 'D';
932 zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
935 if (sks->sortElement->which == Z_SortElement_databaseSpecific)
937 zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
940 else if (sks->sortElement->which != Z_SortElement_generic)
942 zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
945 sk = sks->sortElement->u.generic;
948 case Z_SortKey_sortField:
949 yaz_log(log_level_sort, "key %d is of type sortField", i+1);
950 for (ib = 0; ib < numbases; ib++)
952 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
953 sort_criteria[i].numerical[ib] = 0;
954 sort_criteria[i].ord[ib] =
955 zebraExplain_lookup_attr_str(zh->reg->zei,
956 zinfo_index_category_sort,
958 if (sks->which != Z_SortKeySpec_null
959 && sort_criteria[i].ord[ib] == -1)
962 YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
967 case Z_SortKey_elementSpec:
968 yaz_log(log_level_sort, "key %d is of type elementSpec", i+1);
969 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
971 case Z_SortKey_sortAttributes:
972 yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
973 /* for every database we searched, get the sort index file
974 id (ord) and its numerical indication and store them in
976 for (ib = 0; ib < numbases; ib++)
978 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
979 res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
980 &sort_criteria[i].ord[ib],
981 &sort_criteria[i].numerical[ib]);
984 if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
988 /* right now we look up the index type based on the first database
989 if the index_type's can differ between the indexes of different
990 databases (which i guess they can?) then we have to store the
991 index types for each database, just like the ord and numerical */
992 if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
993 &sort_criteria[i].index_type,
996 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
1000 /* allocate space for each cmpare buf + one extra for tmp comparison */
1001 /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
1002 all other result entries to compare against. This is slowly filled when records are processed.
1003 tmp_cmp_buf is an array with a value of the current record for each criteria
1005 for (i = 0; i<num_criteria; i++)
1007 cmp_buf[i] = xmalloc(sset->sort_info->max_entries
1008 * SORT_IDX_ENTRYSIZE);
1009 tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
1011 rfd = rset_open(rset, RSETF_READ);
1012 while (rset_read(rfd, &key, &termid))
1014 zint this_sys = key.mem[sysno_mem_index];
1015 if (log_level_searchhits)
1016 key_logdump_txt(log_level_searchhits, &key, termid->name);
1018 if (this_sys != psysno)
1020 int database_no = 0;
1021 if ((sset->hits & 255) == 0 && zh->break_handler_func)
1023 if (zh->break_handler_func(zh->break_handler_data))
1025 rset_set_hits_limit(rset, 0);
1032 /* determine database from the term, but only bother if more than
1033 one database is in use*/
1034 if (numbases > 1 && termid->ol)
1036 const char *this_db = 0;
1037 if (zebraExplain_lookup_ord(zh->reg->zei, termid->ol->ord, 0, &this_db, 0)
1040 for (ib = 0; ib < numbases; ib++)
1041 if (!strcmp(this_db, zh->basenames[ib]))
1046 yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " database_no=%d", this_sys,
1048 ord_list_print(termid->ol);
1050 resultSetInsertSort(zh, sset, database_no,
1051 sort_criteria, num_criteria, psysno, cmp_buf,
1057 /* free the compare buffers */
1058 for (i = 0; i<num_criteria; i++)
1061 xfree(tmp_cmp_buf[i]);
1064 yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1066 for (i = 0; i < numTerms; i++)
1067 yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1068 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1069 *sort_status = Z_SortResponse_success;
1073 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1077 if ((s = resultSetGet(zh, resultSetId)))
1082 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
1083 RSET rset, NMEM nmem)
1092 ZebraRankClass rank_class;
1093 struct zset_sort_info *sort_info;
1094 const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1095 size_t sysno_mem_index = 0;
1097 if (zh->m_staticrank)
1098 sysno_mem_index = 1;
1102 sort_info = zebraSet->sort_info;
1103 sort_info->num_entries = 0;
1105 zebraSet->estimated_hit_count = 0;
1106 rset_getterms(rset, 0, 0, &n);
1107 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1108 rset_getterms(rset, terms, n, &numTerms);
1110 rank_class = zebraRankLookup(zh, rank_handler_name);
1113 yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1114 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1119 RSFD rfd = rset_open(rset, RSETF_READ);
1120 struct rank_control *rc = rank_class->control;
1123 void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1124 nmem, terms, numTerms);
1125 zint psysno = 0; /* previous doc id / sys no */
1126 zint pstaticrank = 0; /* previous static rank */
1128 while (rset_read(rfd, &key, &termid))
1130 zint this_sys = key.mem[sysno_mem_index];
1132 zint seqno = key.mem[key.len-1];
1134 if (log_level_searchhits)
1135 key_logdump_txt(log_level_searchhits, &key, termid->name);
1136 if (this_sys != psysno)
1137 { /* new record .. */
1138 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1140 if (zh->break_handler_func(zh->break_handler_data))
1142 yaz_log(YLOG_LOG, "Aborted search");
1146 if (rfd->counted_items > rset->hits_limit)
1149 { /* only if we did have a previous record */
1150 score = (*rc->calc)(handle, psysno, pstaticrank,
1152 /* insert the hit. A=Ascending */
1153 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1158 zebraSet->estimated_hit_count = 1;
1159 rset_set_hits_limit(rset, 0);
1163 if (zh->m_staticrank)
1164 pstaticrank = key.mem[0];
1166 (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1170 { /* we had - at least - one record */
1171 score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1172 /* insert the hit. A=Ascending */
1173 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1176 (*rc->end)(zh->reg, handle);
1179 zebraSet->hits = rset->hits_count;
1181 yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1182 ZINT_FORMAT " sysnos, rank", kno, zebraSet->hits);
1183 for (i = 0; i < numTerms; i++)
1185 yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1187 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1192 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1194 ZebraRankClass p = zh->reg->rank_classes;
1195 while (p && strcmp(p->control->name, name))
1197 if (p && !p->init_flag)
1199 if (p->control->create)
1200 p->class_handle = (*p->control->create)(zh);
1206 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1208 ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1209 p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1210 memcpy(p->control, ctrl, sizeof(*p->control));
1211 p->control->name = xstrdup(ctrl->name);
1213 p->next = reg->rank_classes;
1214 reg->rank_classes = p;
1217 void zebraRankDestroy(struct zebra_register *reg)
1219 ZebraRankClass p = reg->rank_classes;
1222 ZebraRankClass p_next = p->next;
1223 if (p->init_flag && p->control->destroy)
1224 (*p->control->destroy)(reg, p->class_handle);
1225 xfree(p->control->name);
1230 reg->rank_classes = NULL;
1233 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1234 zint *hits_array, int *approx_array)
1238 for (i = 0; i<rset->no_children; i++)
1239 no += trav_rset_for_termids(rset->children[i],
1240 (termid_array ? termid_array + no : 0),
1241 (hits_array ? hits_array + no : 0),
1242 (approx_array ? approx_array + no : 0));
1246 termid_array[no] = rset->term;
1248 hits_array[no] = rset->hits_count;
1250 approx_array[no] = rset->hits_approx;
1252 yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1253 " count=" ZINT_FORMAT,
1254 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1261 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1264 ZebraSet sset = resultSetGet(zh, setname);
1268 *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1274 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1275 int no, zint *count, int *approx,
1276 char *termbuf, size_t *termlen,
1277 const char **term_ref_id)
1279 ZebraSet sset = resultSetGet(zh, setname);
1282 int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1283 if (no >= 0 && no < num_terms)
1285 TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1286 zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1287 int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1289 trav_rset_for_termids(sset->rset, term_array,
1290 hits_array, approx_array);
1293 *count = hits_array[no];
1295 *approx = approx_array[no];
1298 char *inbuf = term_array[no]->name;
1299 size_t inleft = strlen(inbuf);
1300 size_t outleft = *termlen - 1;
1302 if (zh->iconv_from_utf8 != 0)
1304 char *outbuf = termbuf;
1307 ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1309 if (ret == (size_t)(-1))
1313 yaz_iconv(zh->iconv_from_utf8, 0, 0,
1315 *termlen = outbuf - termbuf;
1320 if (inleft > outleft)
1323 memcpy(termbuf, inbuf, *termlen);
1325 termbuf[*termlen] = '\0';
1328 *term_ref_id = term_array[no]->ref_id;
1332 xfree(approx_array);
1339 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1340 zint sysno, zebra_snippets *snippets)
1342 ZebraSet sset = resultSetGet(zh, setname);
1343 yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1349 struct rset_key_control *kc = zebra_key_control_create(zh);
1350 NMEM nmem = nmem_create();
1352 RSET rsets[2], rset_comb;
1353 RSET rset_temp = rset_create_temp(nmem, kc, kc->scope,
1354 res_get(zh->res, "setTmpDir"),0 );
1357 RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1364 rset_write(rsfd, &key);
1367 rsets[0] = rset_temp;
1368 rsets[1] = rset_dup(sset->rset);
1370 rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1372 rsfd = rset_open(rset_comb, RSETF_READ);
1374 while (rset_read(rsfd, &key, &termid))
1378 struct ord_list *ol;
1379 for (ol = termid->ol; ol; ol = ol->next)
1381 zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1382 ol->ord, termid->name);
1388 rset_delete(rset_comb);
1395 static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh,
1396 const char **basenames, int num_bases,
1398 zint *sysnos, int *no_sysnos)
1400 ZEBRA_RES res = ZEBRA_OK;
1401 int sysnos_offset = 0;
1404 if (!zh->reg->isamb || !zh->m_segment_indexing)
1406 if (sysnos_offset < *no_sysnos)
1412 for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1414 const char *database = basenames[i];
1415 if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1417 const char *index_type = "w";
1418 const char *use_string = "_ALLRECORDS";
1420 zinfo_index_category_t cat = zinfo_index_category_alwaysmatches;
1421 ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat,
1422 index_type, use_string);
1426 int ord_len = key_SU_encode(ord, ord_buf);
1429 ord_buf[ord_len] = '\0';
1431 info = dict_lookup(zh->reg->dict, ord_buf);
1434 if (*info != sizeof(ISAM_P))
1442 struct it_key key_until, key_found;
1446 memcpy(&isam_p, info+1, sizeof(ISAM_P));
1448 pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1453 key_until.mem[i++] = recid;
1454 key_until.mem[i++] = 0; /* section_id */
1455 if (zh->m_segment_indexing)
1456 key_until.mem[i++] = 0; /* segment */
1457 key_until.mem[i++] = 0;
1460 r = isamb_pp_forward(pt, &key_found, &key_until);
1461 while (r && key_found.mem[0] == recid)
1463 if (sysnos_offset < *no_sysnos)
1464 sysnos[sysnos_offset++] =
1465 key_found.mem[key_found.len-1];
1466 r = isamb_pp_read(pt, &key_found);
1476 *no_sysnos = sysnos_offset;
1480 ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh,
1481 const char *setname,
1483 zint *sysnos, int *no_sysnos)
1485 const char **basenames;
1489 res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1490 if (res != ZEBRA_OK)
1493 return zebra_recid_to_sysno(zh, basenames, num_bases,
1494 recid, sysnos, no_sysnos);
1497 void zebra_count_set(ZebraHandle zh, RSET rset, zint *count,
1504 yaz_log(YLOG_DEBUG, "count_set");
1506 rset->hits_limit = approx_limit;
1509 rfd = rset_open(rset, RSETF_READ);
1510 while (rset_read(rfd, &key,0 /* never mind terms */))
1512 if (key.mem[0] != psysno)
1514 psysno = key.mem[0];
1515 if (rfd->counted_items >= rset->hits_limit)
1520 *count = rset->hits_count;
1527 * indent-tabs-mode: nil
1529 * vim: shiftwidth=4 tabstop=8 expandtab