1 /* This file is part of the Zebra server.
2 Copyright (C) 1995-2008 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 #include <yaz/diagbib1.h>
34 #define ZSET_SORT_MAX_LEVEL 10
36 struct zebra_set_term_entry {
47 NMEM rset_nmem; /* for creating the rsets in */
50 const char **basenames;
52 Z_SortKeySpecList *sortSpec;
53 struct zset_sort_info *sort_info;
54 struct zebra_set_term_entry *term_entries;
56 struct zebra_set *next;
58 int estimated_hit_count;
60 zint cache_position; /* last position */
61 RSFD cache_rfd; /* rfd (NULL if not existing) */
62 zint cache_psysno; /* sysno for last position */
63 zint approx_limit; /* limit before we do approx */
66 struct zset_sort_entry {
71 struct zset_sort_info {
74 struct zset_sort_entry *all_entries;
75 struct zset_sort_entry **entries;
78 static int log_level_set=0;
79 static int log_level_sort=0;
80 static int log_level_searchhits=0;
81 static int log_level_searchterms=0;
82 static int log_level_resultsets=0;
84 static void loglevels(void)
88 log_level_sort = yaz_log_module_level("sorting");
89 log_level_searchhits = yaz_log_module_level("searchhits");
90 log_level_searchterms = yaz_log_module_level("searchterms");
91 log_level_resultsets = yaz_log_module_level("resultsets");
96 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
97 Z_RPNQuery *rpn, ZebraSet sset)
100 Z_SortKeySpecList *sort_sequence;
102 ZEBRA_RES res = ZEBRA_OK;
104 sort_sequence = (Z_SortKeySpecList *)
105 nmem_malloc(nmem, sizeof(*sort_sequence));
106 sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
107 sort_sequence->specs = (Z_SortKeySpec **)
108 nmem_malloc(nmem, sort_sequence->num_specs *
109 sizeof(*sort_sequence->specs));
110 for (i = 0; i<sort_sequence->num_specs; i++)
111 sort_sequence->specs[i] = 0;
113 rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
115 res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
118 sset->num_bases, sset->basenames,
125 for (i = 0; sort_sequence->specs[i]; i++)
127 sort_sequence->num_specs = i;
128 rset->hits_limit = sset->approx_limit;
131 res = resultSetRank(zh, sset, rset, rset_nmem);
135 res = resultSetSortSingle(zh, nmem, sset, rset,
136 sort_sequence, &sort_status);
143 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
144 int num_bases, char **basenames,
146 zint *hits, int *estimated_hit_count)
153 *estimated_hit_count = 0;
155 zebraSet = resultSetAdd(zh, setname, 1);
158 zebraSet->locked = 1;
161 zebraSet->rset_nmem = nmem_create();
163 zebraSet->num_bases = num_bases;
164 zebraSet->basenames =
165 nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
166 for (i = 0; i<num_bases; i++)
167 zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
169 res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
171 *hits = zebraSet->hits;
172 if (zebraSet->estimated_hit_count)
173 *estimated_hit_count = 1;
177 zebraSet->locked = 0;
183 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
184 const char *db, const char *index_name,
187 assert(zh); /* compiler shut up */
189 s->nmem = nmem_create();
190 if (!s->term_entries)
193 s->term_entries_max = 1000;
195 nmem_malloc(s->nmem, s->term_entries_max *
196 sizeof(*s->term_entries));
197 for (i = 0; i < s->term_entries_max; i++)
198 s->term_entries[i].term = 0;
200 if (s->hits < s->term_entries_max)
202 s->term_entries[s->hits].reg_type = reg_type;
203 s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
204 s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
205 s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
210 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
215 for (s = zh->sets; s; s = s->next)
216 if (!strcmp(s->name, name))
223 yaz_log(log_level_resultsets, "updating result set %s", name);
224 if (!ov || s->locked)
229 rset_close(s->cache_rfd);
230 rset_delete(s->rset);
233 nmem_destroy(s->rset_nmem);
235 nmem_destroy(s->nmem);
239 const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
241 yaz_log(log_level_resultsets, "adding result set %s", name);
242 s = (ZebraSet) xmalloc(sizeof(*s));
245 s->name = xstrdup(name);
247 s->sort_info = (struct zset_sort_info *)
248 xmalloc(sizeof(*s->sort_info));
249 s->sort_info->max_entries = atoi(sort_max_str);
250 if (s->sort_info->max_entries < 2)
251 s->sort_info->max_entries = 2;
253 s->sort_info->entries = (struct zset_sort_entry **)
254 xmalloc(sizeof(*s->sort_info->entries) *
255 s->sort_info->max_entries);
256 s->sort_info->all_entries = (struct zset_sort_entry *)
257 xmalloc(sizeof(*s->sort_info->all_entries) *
258 s->sort_info->max_entries);
259 for (i = 0; i < s->sort_info->max_entries; i++)
260 s->sort_info->entries[i] = s->sort_info->all_entries + i;
270 s->cache_position = 0;
272 s->approx_limit = zh->approx_limit;
273 s->estimated_hit_count = 0;
277 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
281 for (s = zh->sets; s; s = s->next)
282 if (!strcmp(s->name, name))
284 if (!s->term_entries && !s->rset && s->rpn)
286 NMEM nmem = nmem_create();
287 yaz_log(log_level_resultsets, "research %s", name);
289 s->rset_nmem = nmem_create();
290 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
291 if (s->rset && s->sortSpec)
294 yaz_log(log_level_resultsets, "resort %s", name);
295 resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
305 ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname,
306 const char ***basenames, int *num_bases)
308 ZebraSet sset = resultSetGet(zh, setname);
311 *basenames = sset->basenames;
312 *num_bases = sset->num_bases;
317 void resultSetInvalidate(ZebraHandle zh)
319 ZebraSet s = zh->sets;
321 yaz_log(log_level_resultsets, "invalidating result sets");
322 for (; s; s = s->next)
327 rset_close(s->cache_rfd);
328 rset_delete(s->rset);
332 s->cache_position = 0;
334 nmem_destroy(s->rset_nmem);
339 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
341 ZebraSet * ss = &zh->sets;
345 for (i = 0; i<num; i++)
346 statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
353 for (i = 0; i<num; i++)
354 if (!strcmp(s->name, names[i]))
357 statuses[i] = Z_DeleteStatus_success;
366 xfree(s->sort_info->all_entries);
367 xfree(s->sort_info->entries);
371 nmem_destroy(s->nmem);
375 rset_close(s->cache_rfd);
376 rset_delete(s->rset);
379 nmem_destroy(s->rset_nmem);
388 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
393 zint *pos = pos_small;
397 if (num > 10000 || num <= 0)
401 pos = xmalloc(sizeof(*pos) * num);
403 for (i = 0; i<num; i++)
406 mr = zebra_meta_records_create(zh, name, num, pos);
413 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name,
414 int num, zint *positions)
417 ZebraMetaRecord *sr = 0;
420 struct zset_sort_info *sort_info;
421 size_t sysno_mem_index = 0;
423 if (zh->m_staticrank)
428 if (!(sset = resultSetGet(zh, name)))
430 if (!(rset = sset->rset))
432 if (!sset->term_entries)
434 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
435 for (i = 0; i<num; i++)
442 if (positions[i] <= sset->term_entries_max)
444 sr[i].term = sset->term_entries[positions[i]-1].term;
445 sr[i].db = sset->term_entries[positions[i]-1].db;
451 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
452 for (i = 0; i<num; i++)
459 sort_info = sset->sort_info;
464 for (i = 0; i<num; i++)
466 position = positions[i];
467 if (position > 0 && position <= sort_info->num_entries)
469 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
470 " (sorted)", position);
471 sr[i].sysno = sort_info->entries[position-1]->sysno;
472 sr[i].score = sort_info->entries[position-1]->score;
476 /* did we really get all entries using sort ? */
477 for (i = 0; i<num; i++)
482 if (i < num) /* nope, get the rest, unsorted - sorry */
491 position = sort_info->num_entries;
492 while (num_i < num && positions[num_i] <= position)
495 if (sset->cache_rfd &&
496 num_i < num && positions[num_i] > sset->cache_position)
498 position = sset->cache_position;
499 rfd = sset->cache_rfd;
500 psysno = sset->cache_psysno;
505 rset_close(sset->cache_rfd);
506 rfd = rset_open(rset, RSETF_READ);
508 while (num_i < num && rset_read(rfd, &key, 0))
510 zint this_sys = key.mem[sysno_mem_index];
511 if (this_sys != psysno)
516 /* determine we alreay have this in our set */
517 for (i = sort_info->num_entries; --i >= 0; )
518 if (psysno == sort_info->entries[i]->sysno)
525 if (position == positions[num_i])
527 sr[num_i].sysno = psysno;
528 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
529 sr[num_i].score = -1;
534 sset->cache_position = position;
535 sset->cache_psysno = psysno;
536 sset->cache_rfd = rfd;
542 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
545 assert(zh); /* compiler shut up about unused arg */
551 int *ord; // array of ord for each database searched
552 int *numerical;// array of ord for each database searched
553 const char *index_type;
556 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
557 struct sortKeyInfo *criteria, int num_criteria,
559 char *cmp_buf[], char *tmp_cmp_buf[], int *cached_success_db)
561 struct zset_sort_entry *new_entry = NULL;
562 struct zset_sort_info *sort_info = sset->sort_info;
564 int scan_db,scan_count;
565 int numbases = zh->num_basenames;
567 zebra_sort_sysno(zh->reg->sort_index, sysno);
568 for (i = 0; i<num_criteria; i++)
570 char *this_entry_buf = tmp_cmp_buf[i];
571 memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
573 // if the first database doesn't have a sort index, we assume none of them will
574 if (criteria[i].ord[0] != -1)
576 // now make a best guess for the database in which we think the record is located
577 // if its not in our best guess, try the other databases one by one, till we had them all
578 scan_db=*cached_success_db;
583 if(scan_count>numbases){
584 // well...we scanned all databases and still nothing...give up
585 yaz_log(log_level_sort, "zebra_sort_read failed (record not found in indices)");
589 // the criteria[i].ord is the file id of the sort index
590 yaz_log(log_level_sort, "pre zebra_sort_type ord is %d", criteria[i].ord[scan_db]);
591 zebra_sort_type(zh->reg->sort_index, criteria[i].ord[scan_db]);
592 if(zebra_sort_read(zh->reg->sort_index, this_entry_buf)){
593 // allright, found it
594 // cache this db so we start trying from this db for next record
595 *cached_success_db=scan_db;
598 yaz_log(log_level_sort, "record not found in database, trying next one");
600 if(scan_db>=numbases){
607 yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
610 i = sort_info->num_entries;
614 for (j = 0; j<num_criteria; j++)
616 char *this_entry_buf = tmp_cmp_buf[j];
617 char *other_entry_buf =
618 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
619 if (criteria[j].numerical[*cached_success_db])
621 char this_entry_org[1024];
622 char other_entry_org[1024];
624 // when searching multiple databases, we use the index type of
625 // the first one. So if they differ between databases, we have a problem here
626 // we could store the index_type for each database, but if we didn't find the
627 // record in any sort index, then we still don't know to which database it belongs.
628 const char *index_type = criteria[j].index_type;
629 zebra_term_untrans(zh, index_type, this_entry_org,
631 zebra_term_untrans(zh, index_type, other_entry_org,
633 diff = atof(this_entry_org) - atof(other_entry_org);
644 rel = memcmp(this_entry_buf, other_entry_buf,
647 // when the compare is equal, continue to next criteria, else break out
653 if (criteria[j].relation == 'A')
658 else if (criteria[j].relation == 'D')
665 yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
666 j = sort_info->max_entries;
668 yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
672 if (sort_info->num_entries == j)
675 j = (sort_info->num_entries)++;
676 new_entry = sort_info->entries[j];
677 // move up all higher entries (to make room)
681 for (k = 0; k<num_criteria; k++)
683 char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
684 char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
685 memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
687 sort_info->entries[j] = sort_info->entries[j-1];
690 // and insert the new entry at the correct place
691 sort_info->entries[i] = new_entry;
693 // and add this to the compare buffer
694 for (i = 0; i<num_criteria; i++)
696 char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
697 char *this_entry_buf = tmp_cmp_buf[i];
698 memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
700 new_entry->sysno = sysno;
701 new_entry->score = -1;
704 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
705 zint sysno, int score, int relation)
707 struct zset_sort_entry *new_entry = NULL;
709 assert(zh); /* compiler shut up about unused arg */
711 i = sort_info->num_entries;
716 rel = score - sort_info->entries[i]->score;
723 else if (relation == 'A')
730 j = sort_info->max_entries;
734 if (sort_info->num_entries == j)
737 j = (sort_info->num_entries)++;
739 new_entry = sort_info->entries[j];
742 sort_info->entries[j] = sort_info->entries[j-1];
745 sort_info->entries[i] = new_entry;
747 new_entry->sysno = sysno;
748 new_entry->score = score;
751 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
754 ODR encode = odr_createmem(ODR_ENCODE);
755 ODR decode = odr_createmem(ODR_DECODE);
757 if (z_RPNQuery(encode, &src, 0, 0))
760 char *buf = odr_getbuf(encode, &len, 0);
764 odr_setbuf(decode, buf, len, 0);
765 z_RPNQuery(decode, &dst, 0, 0);
768 nmem_transfer(nmem, decode->mem);
774 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
776 Z_SortKeySpecList *dst = 0;
777 ODR encode = odr_createmem(ODR_ENCODE);
778 ODR decode = odr_createmem(ODR_DECODE);
780 if (z_SortKeySpecList(encode, &src, 0, 0))
783 char *buf = odr_getbuf(encode, &len, 0);
787 odr_setbuf(decode, buf, len, 0);
788 z_SortKeySpecList(decode, &dst, 0, 0);
791 nmem_transfer(nmem, decode->mem);
797 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
803 nset = resultSetAdd(zh, setname, 1);
807 nset->nmem = nmem_create();
809 nset->num_bases = rset->num_bases;
811 nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
812 for (i = 0; i<rset->num_bases; i++)
813 nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
816 nset->rset = rset_dup(rset->rset);
818 nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
822 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
823 int num_input_setnames, const char **input_setnames,
824 const char *output_setname,
825 Z_SortKeySpecList *sort_sequence, int *sort_status)
830 if (num_input_setnames == 0)
832 zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
835 if (num_input_setnames > 1)
837 zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
842 yaz_log(log_level_sort, "result set sort input=%s output=%s",
843 *input_setnames, output_setname);
844 sset = resultSetGet(zh, input_setnames[0]);
847 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
851 if (!(rset = sset->rset))
853 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
857 if (strcmp(output_setname, input_setnames[0]))
858 sset = resultSetClone(zh, output_setname, sset);
859 sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
860 return resultSetSortSingle (zh, nmem, sset, rset, sort_sequence,
864 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
865 ZebraSet sset, RSET rset,
866 Z_SortKeySpecList *sort_sequence,
871 int cached_success_db = 0;
876 struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
877 char *cmp_buf[ZSET_SORT_MAX_LEVEL];
878 char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
884 size_t sysno_mem_index = 0;
886 int numbases = zh->num_basenames;
887 yaz_log(log_level_sort, "searching %d databases",numbases);
889 if (zh->m_staticrank)
892 assert(nmem); /* compiler shut up about unused param */
893 sset->sort_info->num_entries = 0;
895 rset_getterms(rset, 0, 0, &n);
896 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
897 rset_getterms(rset, terms, n, &numTerms);
900 num_criteria = sort_sequence->num_specs;
901 if (num_criteria > ZSET_SORT_MAX_LEVEL)
902 num_criteria = ZSET_SORT_MAX_LEVEL;
903 // set up the search criteria
904 for (i = 0; i < num_criteria; i++)
906 Z_SortKeySpec *sks = sort_sequence->specs[i];
910 sort_criteria[i].ord = (int *)
911 nmem_malloc(nmem, sizeof(int)*numbases);
912 sort_criteria[i].numerical = (int *)
913 nmem_malloc(nmem, sizeof(int)*numbases);
915 // initialize ord and numerical for each database
916 for (ib = 0; ib < numbases; ib++)
918 sort_criteria[i].ord[ib] = -1;
919 sort_criteria[i].numerical[ib] = 0;
922 if (sks->which == Z_SortKeySpec_missingValueData)
924 zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
927 if (*sks->sortRelation == Z_SortKeySpec_ascending)
928 sort_criteria[i].relation = 'A';
929 else if (*sks->sortRelation == Z_SortKeySpec_descending)
930 sort_criteria[i].relation = 'D';
933 zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
936 if (sks->sortElement->which == Z_SortElement_databaseSpecific)
938 zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
941 else if (sks->sortElement->which != Z_SortElement_generic)
943 zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
946 sk = sks->sortElement->u.generic;
949 case Z_SortKey_sortField:
950 yaz_log(log_level_sort, "key %d is of type sortField",
952 for (ib = 0; ib < numbases; ib++)
954 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
955 sort_criteria[i].numerical[ib] = 0;
956 sort_criteria[i].ord[ib] =
957 zebraExplain_lookup_attr_str(zh->reg->zei,
958 zinfo_index_category_sort,
960 if (sks->which != Z_SortKeySpec_null
961 && sort_criteria[i].ord[ib] == -1)
964 YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
969 case Z_SortKey_elementSpec:
970 yaz_log(log_level_sort, "key %d is of type elementSpec",
972 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
974 case Z_SortKey_sortAttributes:
975 yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
976 // for every database we searched, get the sort index file id (ord)
977 // and its numerical indication and store them in the sort_criteria
978 for (ib = 0; ib < numbases; ib++)
980 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
981 res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
982 &sort_criteria[i].ord[ib],
983 &sort_criteria[i].numerical[ib]);
986 if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
990 // right now we look up the index type based on the first database
991 // if the index_type's can differ between the indexes of different databases (which i guess they can?)
992 // then we have to store the index types for each database, just like the ord and numerical
993 if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
994 &sort_criteria[i].index_type,
997 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
1001 /* allocate space for each cmpare buf + one extra for tmp comparison */
1002 /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
1003 all other result entries to compare against. This is slowly filled when records are processed.
1004 tmp_cmp_buf is an array with a value of the current record for each criteria
1006 for (i = 0; i<num_criteria; i++)
1008 cmp_buf[i] = xmalloc(sset->sort_info->max_entries
1009 * SORT_IDX_ENTRYSIZE);
1010 tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
1012 rfd = rset_open(rset, RSETF_READ);
1013 while (rset_read(rfd, &key, &termid))
1015 zint this_sys = key.mem[sysno_mem_index];
1016 if (log_level_searchhits)
1017 key_logdump_txt(log_level_searchhits, &key, termid->name);
1019 if (this_sys != psysno)
1021 if ((sset->hits & 255) == 0 && zh->break_handler_func)
1023 if (zh->break_handler_func(zh->break_handler_data))
1025 rset_set_hits_limit(rset, 0);
1031 resultSetInsertSort(zh, sset,
1032 sort_criteria, num_criteria, psysno, cmp_buf,
1033 tmp_cmp_buf, &cached_success_db);
1038 // free the compare buffers
1039 for (i = 0; i<num_criteria; i++)
1042 xfree(tmp_cmp_buf[i]);
1045 yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1047 for (i = 0; i < numTerms; i++)
1048 yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1049 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1050 *sort_status = Z_SortResponse_success;
1054 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1058 if ((s = resultSetGet(zh, resultSetId)))
1063 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
1064 RSET rset, NMEM nmem)
1073 ZebraRankClass rank_class;
1074 struct zset_sort_info *sort_info;
1075 const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1076 size_t sysno_mem_index = 0;
1078 if (zh->m_staticrank)
1079 sysno_mem_index = 1;
1083 sort_info = zebraSet->sort_info;
1084 sort_info->num_entries = 0;
1086 zebraSet->estimated_hit_count = 0;
1087 rset_getterms(rset, 0, 0, &n);
1088 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1089 rset_getterms(rset, terms, n, &numTerms);
1091 rank_class = zebraRankLookup(zh, rank_handler_name);
1094 yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1095 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1100 RSFD rfd = rset_open(rset, RSETF_READ);
1101 struct rank_control *rc = rank_class->control;
1104 void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1105 nmem, terms, numTerms);
1106 zint psysno = 0; /* previous doc id / sys no */
1107 zint pstaticrank = 0; /* previous static rank */
1109 while (rset_read(rfd, &key, &termid))
1111 zint this_sys = key.mem[sysno_mem_index];
1113 zint seqno = key.mem[key.len-1];
1115 if (log_level_searchhits)
1116 key_logdump_txt(log_level_searchhits, &key, termid->name);
1117 if (this_sys != psysno)
1118 { /* new record .. */
1119 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1121 if (zh->break_handler_func(zh->break_handler_data))
1123 yaz_log(YLOG_LOG, "Aborted search");
1127 if (rfd->counted_items > rset->hits_limit)
1130 { /* only if we did have a previous record */
1131 score = (*rc->calc)(handle, psysno, pstaticrank,
1133 /* insert the hit. A=Ascending */
1134 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1139 zebraSet->estimated_hit_count = 1;
1140 rset_set_hits_limit(rset, 0);
1144 if (zh->m_staticrank)
1145 pstaticrank = key.mem[0];
1147 (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1151 { /* we had - at least - one record */
1152 score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1153 /* insert the hit. A=Ascending */
1154 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1157 (*rc->end)(zh->reg, handle);
1160 zebraSet->hits = rset->hits_count;
1162 yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1163 ZINT_FORMAT " sysnos, rank", kno, zebraSet->hits);
1164 for (i = 0; i < numTerms; i++)
1166 yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1168 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1173 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1175 ZebraRankClass p = zh->reg->rank_classes;
1176 while (p && strcmp(p->control->name, name))
1178 if (p && !p->init_flag)
1180 if (p->control->create)
1181 p->class_handle = (*p->control->create)(zh);
1187 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1189 ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1190 p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1191 memcpy(p->control, ctrl, sizeof(*p->control));
1192 p->control->name = xstrdup(ctrl->name);
1194 p->next = reg->rank_classes;
1195 reg->rank_classes = p;
1198 void zebraRankDestroy(struct zebra_register *reg)
1200 ZebraRankClass p = reg->rank_classes;
1203 ZebraRankClass p_next = p->next;
1204 if (p->init_flag && p->control->destroy)
1205 (*p->control->destroy)(reg, p->class_handle);
1206 xfree(p->control->name);
1211 reg->rank_classes = NULL;
1214 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1215 zint *hits_array, int *approx_array)
1219 for (i = 0; i<rset->no_children; i++)
1220 no += trav_rset_for_termids(rset->children[i],
1221 (termid_array ? termid_array + no : 0),
1222 (hits_array ? hits_array + no : 0),
1223 (approx_array ? approx_array + no : 0));
1227 termid_array[no] = rset->term;
1229 hits_array[no] = rset->hits_count;
1231 approx_array[no] = rset->hits_approx;
1233 yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1234 " count=" ZINT_FORMAT,
1235 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1242 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1245 ZebraSet sset = resultSetGet(zh, setname);
1249 *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1255 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1256 int no, zint *count, int *approx,
1257 char *termbuf, size_t *termlen,
1258 const char **term_ref_id)
1260 ZebraSet sset = resultSetGet(zh, setname);
1263 int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1264 if (no >= 0 && no < num_terms)
1266 TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1267 zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1268 int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1270 trav_rset_for_termids(sset->rset, term_array,
1271 hits_array, approx_array);
1274 *count = hits_array[no];
1276 *approx = approx_array[no];
1279 char *inbuf = term_array[no]->name;
1280 size_t inleft = strlen(inbuf);
1281 size_t outleft = *termlen - 1;
1283 if (zh->iconv_from_utf8 != 0)
1285 char *outbuf = termbuf;
1288 ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1290 if (ret == (size_t)(-1))
1294 yaz_iconv(zh->iconv_from_utf8, 0, 0,
1296 *termlen = outbuf - termbuf;
1301 if (inleft > outleft)
1304 memcpy(termbuf, inbuf, *termlen);
1306 termbuf[*termlen] = '\0';
1309 *term_ref_id = term_array[no]->ref_id;
1313 xfree(approx_array);
1320 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1321 zint sysno, zebra_snippets *snippets)
1323 ZebraSet sset = resultSetGet(zh, setname);
1324 yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1330 struct rset_key_control *kc = zebra_key_control_create(zh);
1331 NMEM nmem = nmem_create();
1333 RSET rsets[2], rset_comb;
1334 RSET rset_temp = rset_create_temp(nmem, kc, kc->scope,
1335 res_get(zh->res, "setTmpDir"),0 );
1338 RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1345 rset_write(rsfd, &key);
1348 rsets[0] = rset_temp;
1349 rsets[1] = rset_dup(sset->rset);
1351 rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1353 rsfd = rset_open(rset_comb, RSETF_READ);
1355 while (rset_read(rsfd, &key, &termid))
1359 struct ord_list *ol;
1360 for (ol = termid->ol; ol; ol = ol->next)
1362 zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1363 ol->ord, termid->name);
1369 rset_delete(rset_comb);
1376 static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh,
1377 const char **basenames, int num_bases,
1379 zint *sysnos, int *no_sysnos)
1381 ZEBRA_RES res = ZEBRA_OK;
1382 int sysnos_offset = 0;
1385 if (!zh->reg->isamb || !zh->m_segment_indexing)
1387 if (sysnos_offset < *no_sysnos)
1393 for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1395 const char *database = basenames[i];
1396 if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1398 const char *index_type = "w";
1399 const char *use_string = "_ALLRECORDS";
1401 zinfo_index_category_t cat = zinfo_index_category_alwaysmatches;
1402 ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat,
1403 index_type, use_string);
1407 int ord_len = key_SU_encode(ord, ord_buf);
1410 ord_buf[ord_len] = '\0';
1412 info = dict_lookup(zh->reg->dict, ord_buf);
1415 if (*info != sizeof(ISAM_P))
1423 struct it_key key_until, key_found;
1427 memcpy(&isam_p, info+1, sizeof(ISAM_P));
1429 pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1434 key_until.mem[i++] = recid;
1435 key_until.mem[i++] = 0; /* section_id */
1436 if (zh->m_segment_indexing)
1437 key_until.mem[i++] = 0; /* segment */
1438 key_until.mem[i++] = 0;
1441 r = isamb_pp_forward(pt, &key_found, &key_until);
1442 while (r && key_found.mem[0] == recid)
1444 if (sysnos_offset < *no_sysnos)
1445 sysnos[sysnos_offset++] =
1446 key_found.mem[key_found.len-1];
1447 r = isamb_pp_read(pt, &key_found);
1457 *no_sysnos = sysnos_offset;
1461 ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh,
1462 const char *setname,
1464 zint *sysnos, int *no_sysnos)
1466 const char **basenames;
1470 res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1471 if (res != ZEBRA_OK)
1474 return zebra_recid_to_sysno(zh, basenames, num_bases,
1475 recid, sysnos, no_sysnos);
1478 void zebra_count_set(ZebraHandle zh, RSET rset, zint *count,
1485 yaz_log(YLOG_DEBUG, "count_set");
1487 rset->hits_limit = approx_limit;
1490 rfd = rset_open(rset, RSETF_READ);
1491 while (rset_read(rfd, &key,0 /* never mind terms */))
1493 if (key.mem[0] != psysno)
1495 psysno = key.mem[0];
1496 if (rfd->counted_items >= rset->hits_limit)
1501 *count = rset->hits_count;
1508 * indent-tabs-mode: nil
1510 * vim: shiftwidth=4 tabstop=8 expandtab