1 /* $Id: rpnsearch.c,v 1.27 2007-12-07 14:09:09 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 if (zebra_maps_is_icu(zm))
69 dict_grep_cmap(reg->dict, 0, 0);
71 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
85 const char *index_type;
89 static int add_isam_p(const char *name, const char *info,
94 log_level_rpn = yaz_log_module_level("rpn");
97 /* we may have to stop this madness.. NOTE: -1 so that if
98 truncmax == trunxlimit we do *not* generate result sets */
99 if (p->isam_p_indx >= p->trunc_max - 1)
102 if (p->isam_p_indx == p->isam_p_size)
104 ISAM_P *new_isam_p_buf;
108 p->isam_p_size = 2*p->isam_p_size + 100;
109 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
113 memcpy(new_isam_p_buf, p->isam_p_buf,
114 p->isam_p_indx * sizeof(*p->isam_p_buf));
115 xfree(p->isam_p_buf);
117 p->isam_p_buf = new_isam_p_buf;
120 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
123 memcpy(new_term_no, p->isam_p_buf,
124 p->isam_p_indx * sizeof(*p->term_no));
127 p->term_no = new_term_no;
130 assert(*info == sizeof(*p->isam_p_buf));
131 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
136 char term_tmp[IT_MAX_WORD];
138 const char *index_name;
139 int len = key_SU_decode(&ord, (const unsigned char *) name);
141 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143 zebraExplain_lookup_ord(p->zh->reg->zei,
144 ord, 0 /* index_type */, &db, &index_name);
145 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
147 resultSetAddTerm(p->zh, p->termset, name[len], db,
148 index_name, term_tmp);
154 static int grep_handle(char *name, const char *info, void *p)
156 return add_isam_p(name, info, (struct grep_info *) p);
159 static int term_pre(zebra_map_t zm, const char **src,
160 const char *ct1, const char *ct2, int first)
162 const char *s1, *s0 = *src;
165 /* skip white space */
168 if (ct1 && strchr(ct1, *s0))
170 if (ct2 && strchr(ct2, *s0))
173 map = zebra_maps_input(zm, &s1, strlen(s1), first);
174 if (**map != *CHR_SPACE)
183 static void esc_str(char *out_buf, size_t out_size,
184 const char *in_buf, int in_size)
190 assert(out_size > 20);
192 for (k = 0; k<in_size; k++)
194 int c = in_buf[k] & 0xff;
196 if (c < 32 || c > 126)
200 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
201 if (strlen(out_buf) > out_size-20)
203 strcat(out_buf, "..");
209 #define REGEX_CHARS " []()|.*+?!"
211 static void add_non_space(const char *start, const char *end,
213 char *dst_term, int *dst_ptr,
214 const char **map, int q_map_match)
216 size_t sz = end - start;
217 memcpy(dst_term + *dst_ptr, start, sz);
223 if (strchr(REGEX_CHARS, *start))
224 wrbuf_putc(term_dict, '\\');
225 wrbuf_putc(term_dict, *start);
232 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
234 wrbuf_puts(term_dict, map[0]);
239 static int term_100_icu(zebra_map_t zm,
240 const char **src, WRBUF term_dict, int space_split,
244 const char *res_buf = 0;
246 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len))
248 *src += strlen(*src);
251 strcat(dst_term, *src);
252 for (i = 0; i < res_len; i++)
254 if (strchr(REGEX_CHARS, res_buf[i]))
255 wrbuf_putc(term_dict, '\\');
257 wrbuf_putc(term_dict, 1);
258 wrbuf_putc(term_dict, res_buf[i]);
263 /* term_100: handle term, where trunc = none(no operators at all) */
264 static int term_100(zebra_map_t zm,
265 const char **src, WRBUF term_dict, int space_split,
273 const char *space_start = 0;
274 const char *space_end = 0;
276 if (zebra_maps_is_icu(zm))
277 return term_100_icu(zm, src, term_dict, space_split, dst_term);
279 if (!term_pre(zm, src, NULL, NULL, !space_split))
286 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
289 if (**map == *CHR_SPACE)
292 else /* complete subfield only. */
294 if (**map == *CHR_SPACE)
295 { /* save space mapping for later .. */
300 else if (space_start)
301 { /* reload last space */
302 while (space_start < space_end)
304 if (strchr(REGEX_CHARS, *space_start))
305 wrbuf_putc(term_dict, '\\');
306 dst_term[j++] = *space_start;
307 wrbuf_putc(term_dict, *space_start);
312 space_start = space_end = 0;
317 add_non_space(s1, s0, term_dict, dst_term, &j,
325 /* term_101: handle term, where trunc = Process # */
326 static int term_101(zebra_map_t zm,
327 const char **src, WRBUF term_dict, int space_split,
335 if (!term_pre(zm, src, "#", "#", !space_split))
343 wrbuf_puts(term_dict, ".*");
344 dst_term[j++] = *s0++;
350 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
351 if (space_split && **map == *CHR_SPACE)
355 add_non_space(s1, s0, term_dict, dst_term, &j,
359 dst_term[j++] = '\0';
364 /* term_103: handle term, where trunc = re-2 (regular expressions) */
365 static int term_103(zebra_map_t zm, const char **src,
366 WRBUF term_dict, int *errors, int space_split,
374 if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
377 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
378 isdigit(((const unsigned char *)s0)[1]))
380 *errors = s0[1] - '0';
387 if (strchr("^\\()[].*+?|-", *s0))
390 wrbuf_putc(term_dict, *s0);
398 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
399 if (space_split && **map == *CHR_SPACE)
403 add_non_space(s1, s0, term_dict, dst_term, &j,
413 /* term_103: handle term, where trunc = re-1 (regular expressions) */
414 static int term_102(zebra_map_t zm, const char **src,
415 WRBUF term_dict, int space_split, char *dst_term)
417 return term_103(zm, src, term_dict, NULL, space_split, dst_term);
421 /* term_104: handle term, process # and ! */
422 static int term_104(zebra_map_t zm, const char **src,
423 WRBUF term_dict, int space_split, char *dst_term)
430 if (!term_pre(zm, src, "?*#", "?*#", !space_split))
438 dst_term[j++] = *s0++;
439 if (*s0 >= '0' && *s0 <= '9')
442 while (*s0 >= '0' && *s0 <= '9')
444 limit = limit * 10 + (*s0 - '0');
445 dst_term[j++] = *s0++;
451 wrbuf_puts(term_dict, ".?");
456 wrbuf_puts(term_dict, ".*");
462 wrbuf_puts(term_dict, ".*");
463 dst_term[j++] = *s0++;
468 wrbuf_puts(term_dict, ".");
469 dst_term[j++] = *s0++;
475 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
476 if (space_split && **map == *CHR_SPACE)
480 add_non_space(s1, s0, term_dict, dst_term, &j,
484 dst_term[j++] = '\0';
489 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
490 static int term_105(zebra_map_t zm, const char **src,
491 WRBUF term_dict, int space_split,
492 char *dst_term, int right_truncate)
499 if (!term_pre(zm, src, "*!", "*!", !space_split))
507 wrbuf_puts(term_dict, ".*");
508 dst_term[j++] = *s0++;
513 wrbuf_putc(term_dict, '.');
514 dst_term[j++] = *s0++;
520 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
521 if (space_split && **map == *CHR_SPACE)
525 add_non_space(s1, s0, term_dict, dst_term, &j,
530 wrbuf_puts(term_dict, ".*");
531 dst_term[j++] = '\0';
537 /* gen_regular_rel - generate regular expression from relation
538 * val: border value (inclusive)
539 * islt: 1 if <=; 0 if >=.
541 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
543 char dst_buf[20*5*20]; /* assuming enough for expansion */
550 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
554 strcpy(dst, "(-[0-9]+|(");
562 strcpy(dst, "([0-9]+|-(");
573 sprintf(numstr, "%d", val);
574 for (w = strlen(numstr); --w >= 0; pos++)
593 strcpy(dst + dst_p, numstr);
594 dst_p = strlen(dst) - pos - 1;
622 for (i = 0; i<pos; i++)
635 /* match everything less than 10^(pos-1) */
637 for (i = 1; i<pos; i++)
638 strcat(dst, "[0-9]?");
642 /* match everything greater than 10^pos */
643 for (i = 0; i <= pos; i++)
644 strcat(dst, "[0-9]");
645 strcat(dst, "[0-9]*");
648 wrbuf_puts(term_dict, dst);
651 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
653 const char *src = wrbuf_cstr(wsrc);
654 if (src[*indx] == '\\')
656 wrbuf_putc(term_p, src[*indx]);
659 wrbuf_putc(term_p, src[*indx]);
664 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
665 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
666 * >= abc ([b-].*|a[c-].*|ab[c-].*)
667 * ([^-a].*|a[^-b].*|ab[c-].*)
668 * < abc ([-0].*|a[-a].*|ab[-b].*)
669 * ([^a-].*|a[^b-].*|ab[^c-].*)
670 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
671 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
673 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
674 const char **term_sub, WRBUF term_dict,
675 const Odr_oid *attributeSet,
676 zebra_map_t zm, int space_split, char *term_dst,
682 WRBUF term_component = wrbuf_alloc();
684 attr_init_APT(&relation, zapt, 2);
685 relation_value = attr_find(&relation, NULL);
688 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
689 switch (relation_value)
692 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
694 wrbuf_destroy(term_component);
697 yaz_log(log_level_rpn, "Relation <");
699 wrbuf_putc(term_dict, '(');
700 for (i = 0; i < wrbuf_len(term_component); )
705 wrbuf_putc(term_dict, '|');
707 string_rel_add_char(term_dict, term_component, &j);
709 wrbuf_putc(term_dict, '[');
711 wrbuf_putc(term_dict, '^');
713 wrbuf_putc(term_dict, 1);
714 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
716 string_rel_add_char(term_dict, term_component, &i);
717 wrbuf_putc(term_dict, '-');
719 wrbuf_putc(term_dict, ']');
720 wrbuf_putc(term_dict, '.');
721 wrbuf_putc(term_dict, '*');
723 wrbuf_putc(term_dict, ')');
726 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
728 wrbuf_destroy(term_component);
731 yaz_log(log_level_rpn, "Relation <=");
733 wrbuf_putc(term_dict, '(');
734 for (i = 0; i < wrbuf_len(term_component); )
739 string_rel_add_char(term_dict, term_component, &j);
740 wrbuf_putc(term_dict, '[');
742 wrbuf_putc(term_dict, '^');
744 wrbuf_putc(term_dict, 1);
745 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
747 string_rel_add_char(term_dict, term_component, &i);
748 wrbuf_putc(term_dict, '-');
750 wrbuf_putc(term_dict, ']');
751 wrbuf_putc(term_dict, '.');
752 wrbuf_putc(term_dict, '*');
754 wrbuf_putc(term_dict, '|');
756 for (i = 0; i < wrbuf_len(term_component); )
757 string_rel_add_char(term_dict, term_component, &i);
758 wrbuf_putc(term_dict, ')');
761 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
763 wrbuf_destroy(term_component);
766 yaz_log(log_level_rpn, "Relation >");
768 wrbuf_putc(term_dict, '(');
769 for (i = 0; i < wrbuf_len(term_component); )
774 string_rel_add_char(term_dict, term_component, &j);
775 wrbuf_putc(term_dict, '[');
777 wrbuf_putc(term_dict, '^');
778 wrbuf_putc(term_dict, '-');
779 string_rel_add_char(term_dict, term_component, &i);
781 wrbuf_putc(term_dict, ']');
782 wrbuf_putc(term_dict, '.');
783 wrbuf_putc(term_dict, '*');
785 wrbuf_putc(term_dict, '|');
787 for (i = 0; i < wrbuf_len(term_component); )
788 string_rel_add_char(term_dict, term_component, &i);
789 wrbuf_putc(term_dict, '.');
790 wrbuf_putc(term_dict, '+');
791 wrbuf_putc(term_dict, ')');
794 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
796 wrbuf_destroy(term_component);
799 yaz_log(log_level_rpn, "Relation >=");
801 wrbuf_putc(term_dict, '(');
802 for (i = 0; i < wrbuf_len(term_component); )
807 wrbuf_putc(term_dict, '|');
809 string_rel_add_char(term_dict, term_component, &j);
810 wrbuf_putc(term_dict, '[');
812 if (i < wrbuf_len(term_component)-1)
814 wrbuf_putc(term_dict, '^');
815 wrbuf_putc(term_dict, '-');
816 string_rel_add_char(term_dict, term_component, &i);
820 string_rel_add_char(term_dict, term_component, &i);
821 wrbuf_putc(term_dict, '-');
823 wrbuf_putc(term_dict, ']');
824 wrbuf_putc(term_dict, '.');
825 wrbuf_putc(term_dict, '*');
827 wrbuf_putc(term_dict, ')');
834 yaz_log(log_level_rpn, "Relation =");
835 if (!term_100(zm, term_sub, term_component, space_split, term_dst))
837 wrbuf_destroy(term_component);
840 wrbuf_puts(term_dict, "(");
841 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
842 wrbuf_puts(term_dict, ")");
845 yaz_log(log_level_rpn, "Relation always matches");
846 /* skip to end of term (we don't care what it is) */
847 while (**term_sub != '\0')
851 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
852 wrbuf_destroy(term_component);
855 wrbuf_destroy(term_component);
859 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
860 const char **term_sub,
862 const Odr_oid *attributeSet, NMEM stream,
863 struct grep_info *grep_info,
864 const char *index_type, int complete_flag,
866 const char *xpath_use,
867 struct ord_list **ol,
870 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
871 Z_AttributesPlusTerm *zapt,
872 zint *hits_limit_value,
873 const char **term_ref_id_str,
876 AttrType term_ref_id_attr;
877 AttrType hits_limit_attr;
880 attr_init_APT(&hits_limit_attr, zapt, 11);
881 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
883 attr_init_APT(&term_ref_id_attr, zapt, 10);
884 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
885 if (term_ref_id_int >= 0)
887 char *res = nmem_malloc(nmem, 20);
888 sprintf(res, "%d", term_ref_id_int);
889 *term_ref_id_str = res;
892 /* no limit given ? */
893 if (*hits_limit_value == -1)
895 if (*term_ref_id_str)
897 /* use global if term_ref is present */
898 *hits_limit_value = zh->approx_limit;
902 /* no counting if term_ref is not present */
903 *hits_limit_value = 0;
906 else if (*hits_limit_value == 0)
908 /* 0 is the same as global limit */
909 *hits_limit_value = zh->approx_limit;
911 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
912 *term_ref_id_str ? *term_ref_id_str : "none",
917 /** \brief search for term (which may be truncated)
919 static ZEBRA_RES search_term(ZebraHandle zh,
920 Z_AttributesPlusTerm *zapt,
921 const char **term_sub,
922 const Odr_oid *attributeSet, NMEM stream,
923 struct grep_info *grep_info,
924 const char *index_type, int complete_flag,
926 const char *rank_type,
927 const char *xpath_use,
930 struct rset_key_control *kc,
935 zint hits_limit_value;
936 const char *term_ref_id_str = 0;
937 WRBUF term_dict = wrbuf_alloc();
939 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
941 grep_info->isam_p_indx = 0;
942 res = string_term(zh, zapt, term_sub, term_dict,
943 attributeSet, stream, grep_info,
944 index_type, complete_flag,
945 term_dst, xpath_use, &ol, zm);
946 wrbuf_destroy(term_dict);
949 if (!*term_sub) /* no more terms ? */
951 yaz_log(log_level_rpn, "term: %s", term_dst);
952 *rset = rset_trunc(zh, grep_info->isam_p_buf,
953 grep_info->isam_p_indx, term_dst,
954 strlen(term_dst), rank_type, 1 /* preserve pos */,
955 zapt->term->which, rset_nmem,
956 kc, kc->scope, ol, index_type, hits_limit_value,
963 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
964 const char **term_sub,
966 const Odr_oid *attributeSet, NMEM stream,
967 struct grep_info *grep_info,
968 const char *index_type, int complete_flag,
970 const char *xpath_use,
971 struct ord_list **ol,
976 int truncation_value;
978 struct rpn_char_map_info rcmi;
980 int space_split = complete_flag ? 0 : 1;
983 int max_pos, prefix_len = 0;
988 *ol = ord_list_create(stream);
990 rpn_char_map_prepare(zh->reg, zm, &rcmi);
991 attr_init_APT(&truncation, zapt, 5);
992 truncation_value = attr_find(&truncation, NULL);
993 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
995 termp = *term_sub; /* start of term for each database */
997 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
998 attributeSet, &ord) != ZEBRA_OK)
1004 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1006 *ol = ord_list_append(stream, *ol, ord);
1007 ord_len = key_SU_encode(ord, ord_buf);
1009 wrbuf_putc(term_dict, '(');
1011 for (i = 0; i<ord_len; i++)
1013 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1014 wrbuf_putc(term_dict, ord_buf[i]);
1016 wrbuf_putc(term_dict, ')');
1018 prefix_len = wrbuf_len(term_dict);
1020 switch (truncation_value)
1022 case -1: /* not specified */
1023 case 100: /* do not truncate */
1024 if (!string_relation(zh, zapt, &termp, term_dict,
1026 zm, space_split, term_dst,
1031 zebra_setError(zh, relation_error, 0);
1038 case 1: /* right truncation */
1039 wrbuf_putc(term_dict, '(');
1040 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1045 wrbuf_puts(term_dict, ".*)");
1047 case 2: /* keft truncation */
1048 wrbuf_puts(term_dict, "(.*");
1049 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1054 wrbuf_putc(term_dict, ')');
1056 case 3: /* left&right truncation */
1057 wrbuf_puts(term_dict, "(.*");
1058 if (!term_100(zm, &termp, term_dict, space_split, term_dst))
1063 wrbuf_puts(term_dict, ".*)");
1065 case 101: /* process # in term */
1066 wrbuf_putc(term_dict, '(');
1067 if (!term_101(zm, &termp, term_dict, space_split, term_dst))
1072 wrbuf_puts(term_dict, ")");
1074 case 102: /* Regexp-1 */
1075 wrbuf_putc(term_dict, '(');
1076 if (!term_102(zm, &termp, term_dict, space_split, term_dst))
1081 wrbuf_putc(term_dict, ')');
1083 case 103: /* Regexp-2 */
1085 wrbuf_putc(term_dict, '(');
1086 if (!term_103(zm, &termp, term_dict, ®ex_range,
1087 space_split, term_dst))
1092 wrbuf_putc(term_dict, ')');
1094 case 104: /* process # and ! in term */
1095 wrbuf_putc(term_dict, '(');
1096 if (!term_104(zm, &termp, term_dict, space_split, term_dst))
1101 wrbuf_putc(term_dict, ')');
1103 case 105: /* process * and ! in term */
1104 wrbuf_putc(term_dict, '(');
1105 if (!term_105(zm, &termp, term_dict, space_split, term_dst, 1))
1110 wrbuf_putc(term_dict, ')');
1112 case 106: /* process * and ! in term */
1113 wrbuf_putc(term_dict, '(');
1114 if (!term_105(zm, &termp, term_dict, space_split, term_dst, 0))
1119 wrbuf_putc(term_dict, ')');
1122 zebra_setError_zint(zh,
1123 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1130 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1131 esc_str(buf, sizeof(buf), input, strlen(input));
1134 WRBUF pr_wr = wrbuf_alloc();
1136 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1137 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1138 wrbuf_destroy(pr_wr);
1140 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1141 grep_info, &max_pos,
1142 ord_len /* number of "exact" chars */,
1145 zebra_set_partial_result(zh);
1147 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1149 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1155 static void grep_info_delete(struct grep_info *grep_info)
1158 xfree(grep_info->term_no);
1160 xfree(grep_info->isam_p_buf);
1163 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1164 Z_AttributesPlusTerm *zapt,
1165 struct grep_info *grep_info,
1166 const char *index_type)
1169 grep_info->term_no = 0;
1171 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1172 grep_info->isam_p_size = 0;
1173 grep_info->isam_p_buf = NULL;
1175 grep_info->index_type = index_type;
1176 grep_info->termset = 0;
1182 attr_init_APT(&truncmax, zapt, 13);
1183 truncmax_value = attr_find(&truncmax, NULL);
1184 if (truncmax_value != -1)
1185 grep_info->trunc_max = truncmax_value;
1190 int termset_value_numeric;
1191 const char *termset_value_string;
1193 attr_init_APT(&termset, zapt, 8);
1194 termset_value_numeric =
1195 attr_find_ex(&termset, NULL, &termset_value_string);
1196 if (termset_value_numeric != -1)
1199 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1203 const char *termset_name = 0;
1204 if (termset_value_numeric != -2)
1207 sprintf(resname, "%d", termset_value_numeric);
1208 termset_name = resname;
1211 termset_name = termset_value_string;
1212 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1213 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1214 if (!grep_info->termset)
1216 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1225 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1226 Z_AttributesPlusTerm *zapt,
1228 const Odr_oid *attributeSet,
1230 const char *index_type, int complete_flag,
1231 const char *rank_type,
1232 const char *xpath_use,
1234 RSET **result_sets, int *num_result_sets,
1235 struct rset_key_control *kc,
1238 char term_dst[IT_MAX_WORD+1];
1239 struct grep_info grep_info;
1240 const char *termp = termz;
1243 *num_result_sets = 0;
1245 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1251 if (alloc_sets == *num_result_sets)
1254 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1257 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1258 alloc_sets = alloc_sets + add;
1259 *result_sets = rnew;
1261 res = search_term(zh, zapt, &termp, attributeSet,
1263 index_type, complete_flag,
1264 term_dst, rank_type,
1265 xpath_use, rset_nmem,
1266 &(*result_sets)[*num_result_sets],
1268 if (res != ZEBRA_OK)
1271 for (i = 0; i < *num_result_sets; i++)
1272 rset_delete((*result_sets)[i]);
1273 grep_info_delete(&grep_info);
1276 if ((*result_sets)[*num_result_sets] == 0)
1278 (*num_result_sets)++;
1283 grep_info_delete(&grep_info);
1288 \brief Create result set(s) for list of terms
1289 \param zh Zebra Handle
1290 \param zapt Attributes Plust Term (RPN leaf)
1291 \param termz term as used in query but converted to UTF-8
1292 \param attributeSet default attribute set
1293 \param stream memory for result
1294 \param index_type register type ("w", "p",..)
1295 \param complete_flag whether it's phrases or not
1296 \param rank_type term flags for ranking
1297 \param xpath_use use attribute for X-Path (-1 for no X-path)
1298 \param rset_nmem memory for result sets
1299 \param result_sets output result set for each term in list (output)
1300 \param num_result_sets number of output result sets
1301 \param kc rset key control to be used for created result sets
1303 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1304 Z_AttributesPlusTerm *zapt,
1306 const Odr_oid *attributeSet,
1308 const char *index_type, int complete_flag,
1309 const char *rank_type,
1310 const char *xpath_use,
1312 RSET **result_sets, int *num_result_sets,
1313 struct rset_key_control *kc)
1315 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1316 if (zebra_maps_is_icu(zm))
1317 zebra_map_tokenize_start(zm, termz, strlen(termz));
1318 return search_terms_chrmap(zh, zapt, termz, attributeSet,
1319 stream, index_type, complete_flag,
1320 rank_type, xpath_use,
1321 rset_nmem, result_sets, num_result_sets,
1326 /** \brief limit a search by position - returns result set
1328 static ZEBRA_RES search_position(ZebraHandle zh,
1329 Z_AttributesPlusTerm *zapt,
1330 const Odr_oid *attributeSet,
1331 const char *index_type,
1334 struct rset_key_control *kc)
1340 char term_dict[100];
1344 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1346 attr_init_APT(&position, zapt, 3);
1347 position_value = attr_find(&position, NULL);
1348 switch(position_value)
1357 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1363 if (!zebra_maps_is_first_in_field(zm))
1365 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1370 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1371 attributeSet, &ord) != ZEBRA_OK)
1375 ord_len = key_SU_encode(ord, ord_buf);
1376 memcpy(term_dict, ord_buf, ord_len);
1377 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1378 val = dict_lookup(zh->reg->dict, term_dict);
1381 assert(*val == sizeof(ISAM_P));
1382 memcpy(&isam_p, val+1, sizeof(isam_p));
1384 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1390 /** \brief returns result set for phrase search
1392 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1393 Z_AttributesPlusTerm *zapt,
1394 const char *termz_org,
1395 const Odr_oid *attributeSet,
1397 const char *index_type,
1399 const char *rank_type,
1400 const char *xpath_use,
1403 struct rset_key_control *kc)
1405 RSET *result_sets = 0;
1406 int num_result_sets = 0;
1408 search_terms_list(zh, zapt, termz_org, attributeSet,
1409 stream, index_type, complete_flag,
1410 rank_type, xpath_use,
1412 &result_sets, &num_result_sets, kc);
1414 if (res != ZEBRA_OK)
1417 if (num_result_sets > 0)
1420 res = search_position(zh, zapt, attributeSet,
1422 rset_nmem, &first_set,
1424 if (res != ZEBRA_OK)
1427 for (i = 0; i<num_result_sets; i++)
1428 rset_delete(result_sets[i]);
1433 RSET *nsets = nmem_malloc(stream,
1434 sizeof(RSET) * (num_result_sets+1));
1435 nsets[0] = first_set;
1436 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1437 result_sets = nsets;
1441 if (num_result_sets == 0)
1442 *rset = rset_create_null(rset_nmem, kc, 0);
1443 else if (num_result_sets == 1)
1444 *rset = result_sets[0];
1446 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1447 num_result_sets, result_sets,
1448 1 /* ordered */, 0 /* exclusion */,
1449 3 /* relation */, 1 /* distance */);
1455 /** \brief returns result set for or-list search
1457 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1458 Z_AttributesPlusTerm *zapt,
1459 const char *termz_org,
1460 const Odr_oid *attributeSet,
1462 const char *index_type,
1464 const char *rank_type,
1465 const char *xpath_use,
1468 struct rset_key_control *kc)
1470 RSET *result_sets = 0;
1471 int num_result_sets = 0;
1474 search_terms_list(zh, zapt, termz_org, attributeSet,
1475 stream, index_type, complete_flag,
1476 rank_type, xpath_use,
1478 &result_sets, &num_result_sets, kc);
1479 if (res != ZEBRA_OK)
1482 for (i = 0; i<num_result_sets; i++)
1485 res = search_position(zh, zapt, attributeSet,
1487 rset_nmem, &first_set,
1489 if (res != ZEBRA_OK)
1491 for (i = 0; i<num_result_sets; i++)
1492 rset_delete(result_sets[i]);
1500 tmp_set[0] = first_set;
1501 tmp_set[1] = result_sets[i];
1503 result_sets[i] = rset_create_prox(
1504 rset_nmem, kc, kc->scope,
1506 1 /* ordered */, 0 /* exclusion */,
1507 3 /* relation */, 1 /* distance */);
1510 if (num_result_sets == 0)
1511 *rset = rset_create_null(rset_nmem, kc, 0);
1512 else if (num_result_sets == 1)
1513 *rset = result_sets[0];
1515 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1516 num_result_sets, result_sets);
1522 /** \brief returns result set for and-list search
1524 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1525 Z_AttributesPlusTerm *zapt,
1526 const char *termz_org,
1527 const Odr_oid *attributeSet,
1529 const char *index_type,
1531 const char *rank_type,
1532 const char *xpath_use,
1535 struct rset_key_control *kc)
1537 RSET *result_sets = 0;
1538 int num_result_sets = 0;
1541 search_terms_list(zh, zapt, termz_org, attributeSet,
1542 stream, index_type, complete_flag,
1543 rank_type, xpath_use,
1545 &result_sets, &num_result_sets,
1547 if (res != ZEBRA_OK)
1549 for (i = 0; i<num_result_sets; i++)
1552 res = search_position(zh, zapt, attributeSet,
1554 rset_nmem, &first_set,
1556 if (res != ZEBRA_OK)
1558 for (i = 0; i<num_result_sets; i++)
1559 rset_delete(result_sets[i]);
1567 tmp_set[0] = first_set;
1568 tmp_set[1] = result_sets[i];
1570 result_sets[i] = rset_create_prox(
1571 rset_nmem, kc, kc->scope,
1573 1 /* ordered */, 0 /* exclusion */,
1574 3 /* relation */, 1 /* distance */);
1579 if (num_result_sets == 0)
1580 *rset = rset_create_null(rset_nmem, kc, 0);
1581 else if (num_result_sets == 1)
1582 *rset = result_sets[0];
1584 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1585 num_result_sets, result_sets);
1591 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1592 const char **term_sub,
1594 const Odr_oid *attributeSet,
1595 struct grep_info *grep_info,
1605 WRBUF term_num = wrbuf_alloc();
1608 attr_init_APT(&relation, zapt, 2);
1609 relation_value = attr_find(&relation, NULL);
1611 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1613 switch (relation_value)
1616 yaz_log(log_level_rpn, "Relation <");
1617 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1619 wrbuf_destroy(term_num);
1622 term_value = atoi(wrbuf_cstr(term_num));
1623 gen_regular_rel(term_dict, term_value-1, 1);
1626 yaz_log(log_level_rpn, "Relation <=");
1627 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1629 wrbuf_destroy(term_num);
1632 term_value = atoi(wrbuf_cstr(term_num));
1633 gen_regular_rel(term_dict, term_value, 1);
1636 yaz_log(log_level_rpn, "Relation >=");
1637 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1639 wrbuf_destroy(term_num);
1642 term_value = atoi(wrbuf_cstr(term_num));
1643 gen_regular_rel(term_dict, term_value, 0);
1646 yaz_log(log_level_rpn, "Relation >");
1647 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1649 wrbuf_destroy(term_num);
1652 term_value = atoi(wrbuf_cstr(term_num));
1653 gen_regular_rel(term_dict, term_value+1, 0);
1657 yaz_log(log_level_rpn, "Relation =");
1658 if (!term_100(zm, term_sub, term_num, 1, term_dst))
1660 wrbuf_destroy(term_num);
1663 term_value = atoi(wrbuf_cstr(term_num));
1664 wrbuf_printf(term_dict, "(0*%d)", term_value);
1667 /* term_tmp untouched.. */
1668 while (**term_sub != '\0')
1672 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1673 wrbuf_destroy(term_num);
1676 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1677 0, grep_info, max_pos, 0, grep_handle);
1680 zebra_set_partial_result(zh);
1682 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1683 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1684 wrbuf_destroy(term_num);
1688 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1689 const char **term_sub,
1691 const Odr_oid *attributeSet, NMEM stream,
1692 struct grep_info *grep_info,
1693 const char *index_type, int complete_flag,
1695 const char *xpath_use,
1696 struct ord_list **ol)
1699 struct rpn_char_map_info rcmi;
1701 int relation_error = 0;
1702 int ord, ord_len, i;
1704 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1706 *ol = ord_list_create(stream);
1708 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1712 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1713 attributeSet, &ord) != ZEBRA_OK)
1718 wrbuf_rewind(term_dict);
1720 *ol = ord_list_append(stream, *ol, ord);
1722 ord_len = key_SU_encode(ord, ord_buf);
1724 wrbuf_putc(term_dict, '(');
1725 for (i = 0; i < ord_len; i++)
1727 wrbuf_putc(term_dict, 1);
1728 wrbuf_putc(term_dict, ord_buf[i]);
1730 wrbuf_putc(term_dict, ')');
1732 if (!numeric_relation(zh, zapt, &termp, term_dict,
1733 attributeSet, grep_info, &max_pos, zm,
1734 term_dst, &relation_error))
1738 zebra_setError(zh, relation_error, 0);
1745 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1750 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1751 Z_AttributesPlusTerm *zapt,
1753 const Odr_oid *attributeSet,
1755 const char *index_type,
1757 const char *rank_type,
1758 const char *xpath_use,
1761 struct rset_key_control *kc)
1763 char term_dst[IT_MAX_WORD+1];
1764 const char *termp = termz;
1765 RSET *result_sets = 0;
1766 int num_result_sets = 0;
1768 struct grep_info grep_info;
1770 zint hits_limit_value;
1771 const char *term_ref_id_str = 0;
1773 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1776 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1777 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1781 struct ord_list *ol;
1782 WRBUF term_dict = wrbuf_alloc();
1783 if (alloc_sets == num_result_sets)
1786 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1789 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1790 alloc_sets = alloc_sets + add;
1793 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1794 grep_info.isam_p_indx = 0;
1795 res = numeric_term(zh, zapt, &termp, term_dict,
1796 attributeSet, stream, &grep_info,
1797 index_type, complete_flag,
1798 term_dst, xpath_use, &ol);
1799 wrbuf_destroy(term_dict);
1800 if (res == ZEBRA_FAIL || termp == 0)
1802 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1803 result_sets[num_result_sets] =
1804 rset_trunc(zh, grep_info.isam_p_buf,
1805 grep_info.isam_p_indx, term_dst,
1806 strlen(term_dst), rank_type,
1807 0 /* preserve position */,
1808 zapt->term->which, rset_nmem,
1809 kc, kc->scope, ol, index_type,
1812 if (!result_sets[num_result_sets])
1818 grep_info_delete(&grep_info);
1820 if (res != ZEBRA_OK)
1822 if (num_result_sets == 0)
1823 *rset = rset_create_null(rset_nmem, kc, 0);
1824 else if (num_result_sets == 1)
1825 *rset = result_sets[0];
1827 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1828 num_result_sets, result_sets);
1834 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1835 Z_AttributesPlusTerm *zapt,
1837 const Odr_oid *attributeSet,
1839 const char *rank_type, NMEM rset_nmem,
1841 struct rset_key_control *kc)
1844 zint sysno = atozint(termz);
1848 rec = rec_get(zh->reg->records, sysno);
1856 *rset = rset_create_null(rset_nmem, kc, 0);
1862 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1863 res_get(zh->res, "setTmpDir"), 0);
1864 rsfd = rset_open(*rset, RSETF_WRITE);
1869 rset_write(rsfd, &key);
1875 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1876 const Odr_oid *attributeSet, NMEM stream,
1877 Z_SortKeySpecList *sort_sequence,
1878 const char *rank_type,
1881 struct rset_key_control *kc)
1884 int sort_relation_value;
1885 AttrType sort_relation_type;
1890 attr_init_APT(&sort_relation_type, zapt, 7);
1891 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1893 if (!sort_sequence->specs)
1895 sort_sequence->num_specs = 10;
1896 sort_sequence->specs = (Z_SortKeySpec **)
1897 nmem_malloc(stream, sort_sequence->num_specs *
1898 sizeof(*sort_sequence->specs));
1899 for (i = 0; i<sort_sequence->num_specs; i++)
1900 sort_sequence->specs[i] = 0;
1902 if (zapt->term->which != Z_Term_general)
1905 i = atoi_n((char *) zapt->term->u.general->buf,
1906 zapt->term->u.general->len);
1907 if (i >= sort_sequence->num_specs)
1909 sprintf(termz, "%d", i);
1911 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1912 sks->sortElement = (Z_SortElement *)
1913 nmem_malloc(stream, sizeof(*sks->sortElement));
1914 sks->sortElement->which = Z_SortElement_generic;
1915 sk = sks->sortElement->u.generic = (Z_SortKey *)
1916 nmem_malloc(stream, sizeof(*sk));
1917 sk->which = Z_SortKey_sortAttributes;
1918 sk->u.sortAttributes = (Z_SortAttributes *)
1919 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1921 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1922 sk->u.sortAttributes->list = zapt->attributes;
1924 sks->sortRelation = (int *)
1925 nmem_malloc(stream, sizeof(*sks->sortRelation));
1926 if (sort_relation_value == 1)
1927 *sks->sortRelation = Z_SortKeySpec_ascending;
1928 else if (sort_relation_value == 2)
1929 *sks->sortRelation = Z_SortKeySpec_descending;
1931 *sks->sortRelation = Z_SortKeySpec_ascending;
1933 sks->caseSensitivity = (int *)
1934 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1935 *sks->caseSensitivity = 0;
1937 sks->which = Z_SortKeySpec_null;
1938 sks->u.null = odr_nullval ();
1939 sort_sequence->specs[i] = sks;
1940 *rset = rset_create_null(rset_nmem, kc, 0);
1945 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1946 const Odr_oid *attributeSet,
1947 struct xpath_location_step *xpath, int max,
1950 const Odr_oid *curAttributeSet = attributeSet;
1952 const char *use_string = 0;
1954 attr_init_APT(&use, zapt, 1);
1955 attr_find_ex(&use, &curAttributeSet, &use_string);
1957 if (!use_string || *use_string != '/')
1960 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1965 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1966 const char *index_type, const char *term,
1967 const char *xpath_use,
1969 struct rset_key_control *kc)
1971 struct grep_info grep_info;
1972 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1973 zinfo_index_category_index,
1974 index_type, xpath_use);
1975 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
1976 return rset_create_null(rset_nmem, kc, 0);
1979 return rset_create_null(rset_nmem, kc, 0);
1985 WRBUF term_dict = wrbuf_alloc();
1986 int ord_len = key_SU_encode(ord, ord_buf);
1987 int term_type = Z_Term_characterString;
1988 const char *flags = "void";
1990 wrbuf_putc(term_dict, '(');
1991 for (i = 0; i<ord_len; i++)
1993 wrbuf_putc(term_dict, 1);
1994 wrbuf_putc(term_dict, ord_buf[i]);
1996 wrbuf_putc(term_dict, ')');
1997 wrbuf_puts(term_dict, term);
1999 grep_info.isam_p_indx = 0;
2000 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2001 &grep_info, &max_pos, 0, grep_handle);
2002 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2003 grep_info.isam_p_indx);
2004 rset = rset_trunc(zh, grep_info.isam_p_buf,
2005 grep_info.isam_p_indx, term, strlen(term),
2006 flags, 1, term_type, rset_nmem,
2007 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2008 0 /* term_ref_id_str */);
2009 grep_info_delete(&grep_info);
2010 wrbuf_destroy(term_dict);
2016 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2017 NMEM stream, const char *rank_type, RSET rset,
2018 int xpath_len, struct xpath_location_step *xpath,
2021 struct rset_key_control *kc)
2024 int always_matches = rset ? 0 : 1;
2032 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2033 for (i = 0; i<xpath_len; i++)
2035 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2047 a[@attr = value]/b[@other = othervalue]
2049 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2050 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2051 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2052 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2053 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2054 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2058 dict_grep_cmap(zh->reg->dict, 0, 0);
2061 int level = xpath_len;
2064 while (--level >= 0)
2066 WRBUF xpath_rev = wrbuf_alloc();
2068 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2070 for (i = level; i >= 1; --i)
2072 const char *cp = xpath[i].part;
2078 wrbuf_puts(xpath_rev, "[^/]*");
2079 else if (*cp == ' ')
2080 wrbuf_puts(xpath_rev, "\001 ");
2082 wrbuf_putc(xpath_rev, *cp);
2084 /* wrbuf_putc does not null-terminate , but
2085 wrbuf_puts below ensures it does.. so xpath_rev
2086 is OK iff length is > 0 */
2088 wrbuf_puts(xpath_rev, "/");
2090 else if (i == 1) /* // case */
2091 wrbuf_puts(xpath_rev, ".*");
2093 if (xpath[level].predicate &&
2094 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2095 xpath[level].predicate->u.relation.name[0])
2097 WRBUF wbuf = wrbuf_alloc();
2098 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2099 if (xpath[level].predicate->u.relation.value)
2101 const char *cp = xpath[level].predicate->u.relation.value;
2102 wrbuf_putc(wbuf, '=');
2106 if (strchr(REGEX_CHARS, *cp))
2107 wrbuf_putc(wbuf, '\\');
2108 wrbuf_putc(wbuf, *cp);
2112 rset_attr = xpath_trunc(
2113 zh, stream, "0", wrbuf_cstr(wbuf),
2114 ZEBRA_XPATH_ATTR_NAME,
2116 wrbuf_destroy(wbuf);
2122 wrbuf_destroy(xpath_rev);
2126 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2127 wrbuf_cstr(xpath_rev));
2128 if (wrbuf_len(xpath_rev))
2130 rset_start_tag = xpath_trunc(zh, stream, "0",
2131 wrbuf_cstr(xpath_rev),
2132 ZEBRA_XPATH_ELM_BEGIN,
2135 rset = rset_start_tag;
2138 rset_end_tag = xpath_trunc(zh, stream, "0",
2139 wrbuf_cstr(xpath_rev),
2140 ZEBRA_XPATH_ELM_END,
2143 rset = rset_create_between(rset_nmem, kc, kc->scope,
2144 rset_start_tag, rset,
2145 rset_end_tag, rset_attr);
2148 wrbuf_destroy(xpath_rev);
2156 #define MAX_XPATH_STEPS 10
2158 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2159 Z_AttributesPlusTerm *zapt,
2160 const Odr_oid *attributeSet, NMEM stream,
2161 Z_SortKeySpecList *sort_sequence,
2164 struct rset_key_control *kc);
2166 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2167 const Odr_oid *attributeSet, NMEM stream,
2168 Z_SortKeySpecList *sort_sequence,
2169 int num_bases, const char **basenames,
2172 struct rset_key_control *kc)
2174 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2175 ZEBRA_RES res = ZEBRA_OK;
2177 for (i = 0; i < num_bases; i++)
2180 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2182 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2187 res = rpn_search_database(zh, zapt, attributeSet, stream,
2189 rset_nmem, rsets+i, kc);
2190 if (res != ZEBRA_OK)
2193 if (res != ZEBRA_OK)
2194 { /* must clean up the already created sets */
2196 rset_delete(rsets[i]);
2203 else if (num_bases == 0)
2204 *rset = rset_create_null(rset_nmem, kc, 0);
2206 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2212 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2213 Z_AttributesPlusTerm *zapt,
2214 const Odr_oid *attributeSet, NMEM stream,
2215 Z_SortKeySpecList *sort_sequence,
2218 struct rset_key_control *kc)
2220 ZEBRA_RES res = ZEBRA_OK;
2221 const char *index_type;
2222 char *search_type = NULL;
2223 char rank_type[128];
2226 char termz[IT_MAX_WORD+1];
2228 const char *xpath_use = 0;
2229 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2233 log_level_rpn = yaz_log_module_level("rpn");
2236 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2237 rank_type, &complete_flag, &sort_flag);
2239 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2240 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2241 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2242 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2244 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2248 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2249 rank_type, rset_nmem, rset, kc);
2250 /* consider if an X-Path query is used */
2251 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2252 xpath, MAX_XPATH_STEPS, stream);
2255 if (xpath[xpath_len-1].part[0] == '@')
2256 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2258 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2265 attr_init_APT(&relation, zapt, 2);
2266 relation_value = attr_find(&relation, NULL);
2268 if (relation_value == 103) /* alwaysmatches */
2270 *rset = 0; /* signal no "term" set */
2271 return rpn_search_xpath(zh, stream, rank_type, *rset,
2272 xpath_len, xpath, rset_nmem, rset, kc);
2277 /* search using one of the various search type strategies
2278 termz is our UTF-8 search term
2279 attributeSet is top-level default attribute set
2280 stream is ODR for search
2281 reg_id is the register type
2282 complete_flag is 1 for complete subfield, 0 for incomplete
2283 xpath_use is use-attribute to be used for X-Path search, 0 for none
2285 if (!strcmp(search_type, "phrase"))
2287 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2288 index_type, complete_flag, rank_type,
2293 else if (!strcmp(search_type, "and-list"))
2295 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2296 index_type, complete_flag, rank_type,
2301 else if (!strcmp(search_type, "or-list"))
2303 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2304 index_type, complete_flag, rank_type,
2309 else if (!strcmp(search_type, "local"))
2311 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2312 rank_type, rset_nmem, rset, kc);
2314 else if (!strcmp(search_type, "numeric"))
2316 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2317 index_type, complete_flag, rank_type,
2324 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2327 if (res != ZEBRA_OK)
2331 return rpn_search_xpath(zh, stream, rank_type, *rset,
2332 xpath_len, xpath, rset_nmem, rset, kc);
2335 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2336 const Odr_oid *attributeSet,
2337 NMEM stream, NMEM rset_nmem,
2338 Z_SortKeySpecList *sort_sequence,
2339 int num_bases, const char **basenames,
2340 RSET **result_sets, int *num_result_sets,
2341 Z_Operator *parent_op,
2342 struct rset_key_control *kc);
2344 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2347 ZEBRA_RES res = ZEBRA_OK;
2348 if (zs->which == Z_RPNStructure_complex)
2350 if (res == ZEBRA_OK)
2351 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2353 if (res == ZEBRA_OK)
2354 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2357 else if (zs->which == Z_RPNStructure_simple)
2359 if (zs->u.simple->which == Z_Operand_APT)
2361 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2362 AttrType global_hits_limit_attr;
2365 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2367 l = attr_find(&global_hits_limit_attr, NULL);
2375 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2376 const Odr_oid *attributeSet,
2377 NMEM stream, NMEM rset_nmem,
2378 Z_SortKeySpecList *sort_sequence,
2379 int num_bases, const char **basenames,
2382 RSET *result_sets = 0;
2383 int num_result_sets = 0;
2385 struct rset_key_control *kc = zebra_key_control_create(zh);
2387 res = rpn_search_structure(zh, zs, attributeSet,
2390 num_bases, basenames,
2391 &result_sets, &num_result_sets,
2392 0 /* no parent op */,
2394 if (res != ZEBRA_OK)
2397 for (i = 0; i<num_result_sets; i++)
2398 rset_delete(result_sets[i]);
2403 assert(num_result_sets == 1);
2404 assert(result_sets);
2405 assert(*result_sets);
2406 *result_set = *result_sets;
2412 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2413 const Odr_oid *attributeSet,
2414 NMEM stream, NMEM rset_nmem,
2415 Z_SortKeySpecList *sort_sequence,
2416 int num_bases, const char **basenames,
2417 RSET **result_sets, int *num_result_sets,
2418 Z_Operator *parent_op,
2419 struct rset_key_control *kc)
2421 *num_result_sets = 0;
2422 if (zs->which == Z_RPNStructure_complex)
2425 Z_Operator *zop = zs->u.complex->roperator;
2426 RSET *result_sets_l = 0;
2427 int num_result_sets_l = 0;
2428 RSET *result_sets_r = 0;
2429 int num_result_sets_r = 0;
2431 res = rpn_search_structure(zh, zs->u.complex->s1,
2432 attributeSet, stream, rset_nmem,
2434 num_bases, basenames,
2435 &result_sets_l, &num_result_sets_l,
2437 if (res != ZEBRA_OK)
2440 for (i = 0; i<num_result_sets_l; i++)
2441 rset_delete(result_sets_l[i]);
2444 res = rpn_search_structure(zh, zs->u.complex->s2,
2445 attributeSet, stream, rset_nmem,
2447 num_bases, basenames,
2448 &result_sets_r, &num_result_sets_r,
2450 if (res != ZEBRA_OK)
2453 for (i = 0; i<num_result_sets_l; i++)
2454 rset_delete(result_sets_l[i]);
2455 for (i = 0; i<num_result_sets_r; i++)
2456 rset_delete(result_sets_r[i]);
2460 /* make a new list of result for all children */
2461 *num_result_sets = num_result_sets_l + num_result_sets_r;
2462 *result_sets = nmem_malloc(stream, *num_result_sets *
2463 sizeof(**result_sets));
2464 memcpy(*result_sets, result_sets_l,
2465 num_result_sets_l * sizeof(**result_sets));
2466 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2467 num_result_sets_r * sizeof(**result_sets));
2469 if (!parent_op || parent_op->which != zop->which
2470 || (zop->which != Z_Operator_and &&
2471 zop->which != Z_Operator_or))
2473 /* parent node different from this one (or non-present) */
2474 /* we must combine result sets now */
2478 case Z_Operator_and:
2479 rset = rset_create_and(rset_nmem, kc,
2481 *num_result_sets, *result_sets);
2484 rset = rset_create_or(rset_nmem, kc,
2485 kc->scope, 0, /* termid */
2486 *num_result_sets, *result_sets);
2488 case Z_Operator_and_not:
2489 rset = rset_create_not(rset_nmem, kc,
2494 case Z_Operator_prox:
2495 if (zop->u.prox->which != Z_ProximityOperator_known)
2498 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2502 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2504 zebra_setError_zint(zh,
2505 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2506 *zop->u.prox->u.known);
2511 rset = rset_create_prox(rset_nmem, kc,
2513 *num_result_sets, *result_sets,
2514 *zop->u.prox->ordered,
2515 (!zop->u.prox->exclusion ?
2516 0 : *zop->u.prox->exclusion),
2517 *zop->u.prox->relationType,
2518 *zop->u.prox->distance );
2522 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2525 *num_result_sets = 1;
2526 *result_sets = nmem_malloc(stream, *num_result_sets *
2527 sizeof(**result_sets));
2528 (*result_sets)[0] = rset;
2531 else if (zs->which == Z_RPNStructure_simple)
2536 if (zs->u.simple->which == Z_Operand_APT)
2538 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2539 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2540 attributeSet, stream, sort_sequence,
2541 num_bases, basenames, rset_nmem, &rset,
2543 if (res != ZEBRA_OK)
2546 else if (zs->u.simple->which == Z_Operand_resultSetId)
2548 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2549 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2553 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2554 zs->u.simple->u.resultSetId);
2561 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2564 *num_result_sets = 1;
2565 *result_sets = nmem_malloc(stream, *num_result_sets *
2566 sizeof(**result_sets));
2567 (*result_sets)[0] = rset;
2571 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2582 * indent-tabs-mode: nil
2584 * vim: shiftwidth=4 tabstop=8 expandtab