1 /* This file is part of the Zebra server.
2 Copyright (C) 1995-2008 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30 #include <yaz/diagbib1.h>
32 #include <zebra_xpath.h>
37 static int log_level_set = 0;
38 static int log_level_rpn = 0;
40 #define TERMSET_DISABLE 1
42 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
44 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
45 const char **out = zebra_maps_input(p->zm, from, len, 0);
49 const char *outp = *out;
50 yaz_log(YLOG_LOG, "---");
53 yaz_log(YLOG_LOG, "%02X", *outp);
61 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
62 struct rpn_char_map_info *map_info)
65 if (zebra_maps_is_icu(zm))
66 dict_grep_cmap(reg->dict, 0, 0);
68 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
82 const char *index_type;
86 static int add_isam_p(const char *name, const char *info,
91 log_level_rpn = yaz_log_module_level("rpn");
94 /* we may have to stop this madness.. NOTE: -1 so that if
95 truncmax == trunxlimit we do *not* generate result sets */
96 if (p->isam_p_indx >= p->trunc_max - 1)
99 if (p->isam_p_indx == p->isam_p_size)
101 ISAM_P *new_isam_p_buf;
105 p->isam_p_size = 2*p->isam_p_size + 100;
106 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
110 memcpy(new_isam_p_buf, p->isam_p_buf,
111 p->isam_p_indx * sizeof(*p->isam_p_buf));
112 xfree(p->isam_p_buf);
114 p->isam_p_buf = new_isam_p_buf;
117 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
120 memcpy(new_term_no, p->isam_p_buf,
121 p->isam_p_indx * sizeof(*p->term_no));
124 p->term_no = new_term_no;
127 assert(*info == sizeof(*p->isam_p_buf));
128 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
133 char term_tmp[IT_MAX_WORD];
135 const char *index_name;
136 int len = key_SU_decode(&ord, (const unsigned char *) name);
138 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
139 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
140 zebraExplain_lookup_ord(p->zh->reg->zei,
141 ord, 0 /* index_type */, &db, &index_name);
142 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
144 resultSetAddTerm(p->zh, p->termset, name[len], db,
145 index_name, term_tmp);
151 static int grep_handle(char *name, const char *info, void *p)
153 return add_isam_p(name, info, (struct grep_info *) p);
156 static int term_pre(zebra_map_t zm, const char **src,
157 const char *ct1, const char *ct2, int first)
159 const char *s1, *s0 = *src;
162 /* skip white space */
165 if (ct1 && strchr(ct1, *s0))
167 if (ct2 && strchr(ct2, *s0))
170 map = zebra_maps_input(zm, &s1, strlen(s1), first);
171 if (**map != *CHR_SPACE)
180 static void esc_str(char *out_buf, size_t out_size,
181 const char *in_buf, int in_size)
187 assert(out_size > 20);
189 for (k = 0; k<in_size; k++)
191 int c = in_buf[k] & 0xff;
193 if (c < 32 || c > 126)
197 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
198 if (strlen(out_buf) > out_size-20)
200 strcat(out_buf, "..");
206 #define REGEX_CHARS " []()|.*+?!\"$"
208 static void add_non_space(const char *start, const char *end,
211 const char **map, int q_map_match)
213 size_t sz = end - start;
215 wrbuf_write(display_term, start, sz);
220 if (strchr(REGEX_CHARS, *start))
221 wrbuf_putc(term_dict, '\\');
222 wrbuf_putc(term_dict, *start);
229 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
231 wrbuf_puts(term_dict, map[0]);
236 static int term_100_icu(zebra_map_t zm,
237 const char **src, WRBUF term_dict, int space_split,
242 const char *res_buf = 0;
244 const char *display_buf;
246 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
247 &display_buf, &display_len))
249 *src += strlen(*src);
252 wrbuf_write(display_term, display_buf, display_len);
255 /* ICU sort keys seem to be of the form
256 basechars \x01 accents \x01 length
257 For now we'll just right truncate from basechars . This
258 may give false hits due to accents not being used.
261 while (--i >= 0 && res_buf[i] != '\x01')
265 while (--i >= 0 && res_buf[i] != '\x01')
269 { /* did not find base chars at all. Throw error */
272 res_len = i; /* reduce res_len */
274 for (i = 0; i < res_len; i++)
276 if (strchr(REGEX_CHARS "\\", res_buf[i]))
277 wrbuf_putc(term_dict, '\\');
279 wrbuf_putc(term_dict, 1);
281 wrbuf_putc(term_dict, res_buf[i]);
284 wrbuf_puts(term_dict, ".*");
288 /* term_100: handle term, where trunc = none(no operators at all) */
289 static int term_100(zebra_map_t zm,
290 const char **src, WRBUF term_dict, int space_split,
297 const char *space_start = 0;
298 const char *space_end = 0;
300 if (!term_pre(zm, src, NULL, NULL, !space_split))
307 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
310 if (**map == *CHR_SPACE)
313 else /* complete subfield only. */
315 if (**map == *CHR_SPACE)
316 { /* save space mapping for later .. */
321 else if (space_start)
322 { /* reload last space */
323 while (space_start < space_end)
325 if (strchr(REGEX_CHARS, *space_start))
326 wrbuf_putc(term_dict, '\\');
327 wrbuf_putc(display_term, *space_start);
328 wrbuf_putc(term_dict, *space_start);
333 space_start = space_end = 0;
338 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
344 /* term_101: handle term, where trunc = Process # */
345 static int term_101(zebra_map_t zm,
346 const char **src, WRBUF term_dict, int space_split,
353 if (!term_pre(zm, src, "#", "#", !space_split))
361 wrbuf_puts(term_dict, ".*");
362 wrbuf_putc(display_term, *s0);
369 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
370 if (space_split && **map == *CHR_SPACE)
374 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
381 /* term_103: handle term, where trunc = re-2 (regular expressions) */
382 static int term_103(zebra_map_t zm, const char **src,
383 WRBUF term_dict, int *errors, int space_split,
390 if (!term_pre(zm, src, "^\\()[].*+?|", "(", !space_split))
393 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
394 isdigit(((const unsigned char *)s0)[1]))
396 *errors = s0[1] - '0';
403 if (strchr("^\\()[].*+?|-", *s0))
405 wrbuf_putc(display_term, *s0);
406 wrbuf_putc(term_dict, *s0);
414 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
415 if (space_split && **map == *CHR_SPACE)
419 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
427 /* term_103: handle term, where trunc = re-1 (regular expressions) */
428 static int term_102(zebra_map_t zm, const char **src,
429 WRBUF term_dict, int space_split, WRBUF display_term)
431 return term_103(zm, src, term_dict, NULL, space_split, display_term);
435 /* term_104: handle term, process # and ! */
436 static int term_104(zebra_map_t zm, const char **src,
437 WRBUF term_dict, int space_split, WRBUF display_term)
443 if (!term_pre(zm, src, "?*#", "?*#", !space_split))
451 wrbuf_putc(display_term, *s0);
453 if (*s0 >= '0' && *s0 <= '9')
456 while (*s0 >= '0' && *s0 <= '9')
458 limit = limit * 10 + (*s0 - '0');
459 wrbuf_putc(display_term, *s0);
466 wrbuf_puts(term_dict, ".?");
471 wrbuf_puts(term_dict, ".*");
477 wrbuf_puts(term_dict, ".*");
478 wrbuf_putc(display_term, *s0);
484 wrbuf_puts(term_dict, ".");
485 wrbuf_putc(display_term, *s0);
492 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
493 if (space_split && **map == *CHR_SPACE)
497 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
504 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
505 static int term_105(zebra_map_t zm, const char **src,
506 WRBUF term_dict, int space_split,
507 WRBUF display_term, int right_truncate)
513 if (!term_pre(zm, src, "*!", "*!", !space_split))
521 wrbuf_puts(term_dict, ".*");
522 wrbuf_putc(display_term, *s0);
528 wrbuf_putc(term_dict, '.');
529 wrbuf_putc(display_term, *s0);
536 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
537 if (space_split && **map == *CHR_SPACE)
541 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
545 wrbuf_puts(term_dict, ".*");
551 /* gen_regular_rel - generate regular expression from relation
552 * val: border value (inclusive)
553 * islt: 1 if <=; 0 if >=.
555 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
557 char dst_buf[20*5*20]; /* assuming enough for expansion */
564 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
568 strcpy(dst, "(-[0-9]+|(");
576 strcpy(dst, "([0-9]+|-(");
587 sprintf(numstr, "%d", val);
588 for (w = strlen(numstr); --w >= 0; pos++)
607 strcpy(dst + dst_p, numstr);
608 dst_p = strlen(dst) - pos - 1;
636 for (i = 0; i<pos; i++)
649 /* match everything less than 10^(pos-1) */
651 for (i = 1; i<pos; i++)
652 strcat(dst, "[0-9]?");
656 /* match everything greater than 10^pos */
657 for (i = 0; i <= pos; i++)
658 strcat(dst, "[0-9]");
659 strcat(dst, "[0-9]*");
662 wrbuf_puts(term_dict, dst);
665 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
667 const char *src = wrbuf_cstr(wsrc);
668 if (src[*indx] == '\\')
670 wrbuf_putc(term_p, src[*indx]);
673 wrbuf_putc(term_p, src[*indx]);
678 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
679 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
680 * >= abc ([b-].*|a[c-].*|ab[c-].*)
681 * ([^-a].*|a[^-b].*|ab[c-].*)
682 * < abc ([-0].*|a[-a].*|ab[-b].*)
683 * ([^a-].*|a[^b-].*|ab[^c-].*)
684 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
685 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
687 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
688 const char **term_sub, WRBUF term_dict,
689 const Odr_oid *attributeSet,
690 zebra_map_t zm, int space_split,
697 WRBUF term_component = wrbuf_alloc();
699 attr_init_APT(&relation, zapt, 2);
700 relation_value = attr_find(&relation, NULL);
703 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
704 switch (relation_value)
707 if (!term_100(zm, term_sub, term_component, space_split, display_term))
709 wrbuf_destroy(term_component);
712 yaz_log(log_level_rpn, "Relation <");
714 wrbuf_putc(term_dict, '(');
715 for (i = 0; i < wrbuf_len(term_component); )
720 wrbuf_putc(term_dict, '|');
722 string_rel_add_char(term_dict, term_component, &j);
724 wrbuf_putc(term_dict, '[');
726 wrbuf_putc(term_dict, '^');
728 wrbuf_putc(term_dict, 1);
729 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
731 string_rel_add_char(term_dict, term_component, &i);
732 wrbuf_putc(term_dict, '-');
734 wrbuf_putc(term_dict, ']');
735 wrbuf_putc(term_dict, '.');
736 wrbuf_putc(term_dict, '*');
738 wrbuf_putc(term_dict, ')');
741 if (!term_100(zm, term_sub, term_component, space_split, display_term))
743 wrbuf_destroy(term_component);
746 yaz_log(log_level_rpn, "Relation <=");
748 wrbuf_putc(term_dict, '(');
749 for (i = 0; i < wrbuf_len(term_component); )
754 string_rel_add_char(term_dict, term_component, &j);
755 wrbuf_putc(term_dict, '[');
757 wrbuf_putc(term_dict, '^');
759 wrbuf_putc(term_dict, 1);
760 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
762 string_rel_add_char(term_dict, term_component, &i);
763 wrbuf_putc(term_dict, '-');
765 wrbuf_putc(term_dict, ']');
766 wrbuf_putc(term_dict, '.');
767 wrbuf_putc(term_dict, '*');
769 wrbuf_putc(term_dict, '|');
771 for (i = 0; i < wrbuf_len(term_component); )
772 string_rel_add_char(term_dict, term_component, &i);
773 wrbuf_putc(term_dict, ')');
776 if (!term_100(zm, term_sub, term_component, space_split, display_term))
778 wrbuf_destroy(term_component);
781 yaz_log(log_level_rpn, "Relation >");
783 wrbuf_putc(term_dict, '(');
784 for (i = 0; i < wrbuf_len(term_component); )
789 string_rel_add_char(term_dict, term_component, &j);
790 wrbuf_putc(term_dict, '[');
792 wrbuf_putc(term_dict, '^');
793 wrbuf_putc(term_dict, '-');
794 string_rel_add_char(term_dict, term_component, &i);
796 wrbuf_putc(term_dict, ']');
797 wrbuf_putc(term_dict, '.');
798 wrbuf_putc(term_dict, '*');
800 wrbuf_putc(term_dict, '|');
802 for (i = 0; i < wrbuf_len(term_component); )
803 string_rel_add_char(term_dict, term_component, &i);
804 wrbuf_putc(term_dict, '.');
805 wrbuf_putc(term_dict, '+');
806 wrbuf_putc(term_dict, ')');
809 if (!term_100(zm, term_sub, term_component, space_split, display_term))
811 wrbuf_destroy(term_component);
814 yaz_log(log_level_rpn, "Relation >=");
816 wrbuf_putc(term_dict, '(');
817 for (i = 0; i < wrbuf_len(term_component); )
822 wrbuf_putc(term_dict, '|');
824 string_rel_add_char(term_dict, term_component, &j);
825 wrbuf_putc(term_dict, '[');
827 if (i < wrbuf_len(term_component)-1)
829 wrbuf_putc(term_dict, '^');
830 wrbuf_putc(term_dict, '-');
831 string_rel_add_char(term_dict, term_component, &i);
835 string_rel_add_char(term_dict, term_component, &i);
836 wrbuf_putc(term_dict, '-');
838 wrbuf_putc(term_dict, ']');
839 wrbuf_putc(term_dict, '.');
840 wrbuf_putc(term_dict, '*');
842 wrbuf_putc(term_dict, ')');
849 yaz_log(log_level_rpn, "Relation =");
850 if (!term_100(zm, term_sub, term_component, space_split, display_term))
852 wrbuf_destroy(term_component);
855 wrbuf_puts(term_dict, "(");
856 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
857 wrbuf_puts(term_dict, ")");
860 yaz_log(log_level_rpn, "Relation always matches");
861 /* skip to end of term (we don't care what it is) */
862 while (**term_sub != '\0')
866 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
867 wrbuf_destroy(term_component);
870 wrbuf_destroy(term_component);
874 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
875 const char **term_sub,
877 const Odr_oid *attributeSet, NMEM stream,
878 struct grep_info *grep_info,
879 const char *index_type, int complete_flag,
881 const char *xpath_use,
882 struct ord_list **ol,
885 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
886 Z_AttributesPlusTerm *zapt,
887 zint *hits_limit_value,
888 const char **term_ref_id_str,
891 AttrType term_ref_id_attr;
892 AttrType hits_limit_attr;
895 attr_init_APT(&hits_limit_attr, zapt, 11);
896 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
898 attr_init_APT(&term_ref_id_attr, zapt, 10);
899 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
900 if (term_ref_id_int >= 0)
902 char *res = nmem_malloc(nmem, 20);
903 sprintf(res, "%d", term_ref_id_int);
904 *term_ref_id_str = res;
907 /* no limit given ? */
908 if (*hits_limit_value == -1)
910 if (*term_ref_id_str)
912 /* use global if term_ref is present */
913 *hits_limit_value = zh->approx_limit;
917 /* no counting if term_ref is not present */
918 *hits_limit_value = 0;
921 else if (*hits_limit_value == 0)
923 /* 0 is the same as global limit */
924 *hits_limit_value = zh->approx_limit;
926 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
927 *term_ref_id_str ? *term_ref_id_str : "none",
932 /** \brief search for term (which may be truncated)
934 static ZEBRA_RES search_term(ZebraHandle zh,
935 Z_AttributesPlusTerm *zapt,
936 const char **term_sub,
937 const Odr_oid *attributeSet, NMEM stream,
938 struct grep_info *grep_info,
939 const char *index_type, int complete_flag,
940 const char *rank_type,
941 const char *xpath_use,
944 struct rset_key_control *kc,
949 zint hits_limit_value;
950 const char *term_ref_id_str = 0;
951 WRBUF term_dict = wrbuf_alloc();
952 WRBUF display_term = wrbuf_alloc();
954 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
956 grep_info->isam_p_indx = 0;
957 res = string_term(zh, zapt, term_sub, term_dict,
958 attributeSet, stream, grep_info,
959 index_type, complete_flag,
960 display_term, xpath_use, &ol, zm);
961 wrbuf_destroy(term_dict);
962 if (res == ZEBRA_OK && *term_sub)
964 yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
965 *rset = rset_trunc(zh, grep_info->isam_p_buf,
966 grep_info->isam_p_indx, wrbuf_buf(display_term),
967 wrbuf_len(display_term), rank_type,
968 1 /* preserve pos */,
969 zapt->term->which, rset_nmem,
970 kc, kc->scope, ol, index_type, hits_limit_value,
975 wrbuf_destroy(display_term);
979 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
980 const char **term_sub,
982 const Odr_oid *attributeSet, NMEM stream,
983 struct grep_info *grep_info,
984 const char *index_type, int complete_flag,
986 const char *xpath_use,
987 struct ord_list **ol,
992 int truncation_value;
994 struct rpn_char_map_info rcmi;
996 int space_split = complete_flag ? 0 : 1;
999 int max_pos, prefix_len = 0;
1004 *ol = ord_list_create(stream);
1006 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1007 attr_init_APT(&truncation, zapt, 5);
1008 truncation_value = attr_find(&truncation, NULL);
1009 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1011 termp = *term_sub; /* start of term for each database */
1013 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1014 attributeSet, &ord) != ZEBRA_OK)
1020 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1022 *ol = ord_list_append(stream, *ol, ord);
1023 ord_len = key_SU_encode(ord, ord_buf);
1025 wrbuf_putc(term_dict, '(');
1027 for (i = 0; i<ord_len; i++)
1029 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1030 wrbuf_putc(term_dict, ord_buf[i]);
1032 wrbuf_putc(term_dict, ')');
1034 prefix_len = wrbuf_len(term_dict);
1036 if (zebra_maps_is_icu(zm))
1039 switch (truncation_value)
1041 case -1: /* not specified */
1042 case 100: /* do not truncate */
1043 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 0))
1049 case 1: /* right truncation */
1050 if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1))
1057 zebra_setError_zint(zh,
1058 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1065 /* non-ICU case. using string.chr and friends */
1066 switch (truncation_value)
1068 case -1: /* not specified */
1069 case 100: /* do not truncate */
1070 if (!string_relation(zh, zapt, &termp, term_dict,
1072 zm, space_split, display_term,
1077 zebra_setError(zh, relation_error, 0);
1084 case 1: /* right truncation */
1085 wrbuf_putc(term_dict, '(');
1086 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1091 wrbuf_puts(term_dict, ".*)");
1093 case 2: /* left truncation */
1094 wrbuf_puts(term_dict, "(.*");
1095 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1100 wrbuf_putc(term_dict, ')');
1102 case 3: /* left&right truncation */
1103 wrbuf_puts(term_dict, "(.*");
1104 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1109 wrbuf_puts(term_dict, ".*)");
1111 case 101: /* process # in term */
1112 wrbuf_putc(term_dict, '(');
1113 if (!term_101(zm, &termp, term_dict, space_split, display_term))
1118 wrbuf_puts(term_dict, ")");
1120 case 102: /* Regexp-1 */
1121 wrbuf_putc(term_dict, '(');
1122 if (!term_102(zm, &termp, term_dict, space_split, display_term))
1127 wrbuf_putc(term_dict, ')');
1129 case 103: /* Regexp-2 */
1131 wrbuf_putc(term_dict, '(');
1132 if (!term_103(zm, &termp, term_dict, ®ex_range,
1133 space_split, display_term))
1138 wrbuf_putc(term_dict, ')');
1140 case 104: /* process # and ! in term */
1141 wrbuf_putc(term_dict, '(');
1142 if (!term_104(zm, &termp, term_dict, space_split, display_term))
1147 wrbuf_putc(term_dict, ')');
1149 case 105: /* process * and ! in term */
1150 wrbuf_putc(term_dict, '(');
1151 if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1156 wrbuf_putc(term_dict, ')');
1158 case 106: /* process * and ! in term */
1159 wrbuf_putc(term_dict, '(');
1160 if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1165 wrbuf_putc(term_dict, ')');
1168 zebra_setError_zint(zh,
1169 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1177 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1178 esc_str(buf, sizeof(buf), input, strlen(input));
1181 WRBUF pr_wr = wrbuf_alloc();
1183 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1184 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1185 wrbuf_destroy(pr_wr);
1187 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1188 grep_info, &max_pos,
1189 ord_len /* number of "exact" chars */,
1192 zebra_set_partial_result(zh);
1194 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1196 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1202 static void grep_info_delete(struct grep_info *grep_info)
1205 xfree(grep_info->term_no);
1207 xfree(grep_info->isam_p_buf);
1210 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1211 Z_AttributesPlusTerm *zapt,
1212 struct grep_info *grep_info,
1213 const char *index_type)
1216 grep_info->term_no = 0;
1218 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1219 grep_info->isam_p_size = 0;
1220 grep_info->isam_p_buf = NULL;
1222 grep_info->index_type = index_type;
1223 grep_info->termset = 0;
1229 attr_init_APT(&truncmax, zapt, 13);
1230 truncmax_value = attr_find(&truncmax, NULL);
1231 if (truncmax_value != -1)
1232 grep_info->trunc_max = truncmax_value;
1237 int termset_value_numeric;
1238 const char *termset_value_string;
1240 attr_init_APT(&termset, zapt, 8);
1241 termset_value_numeric =
1242 attr_find_ex(&termset, NULL, &termset_value_string);
1243 if (termset_value_numeric != -1)
1246 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1250 const char *termset_name = 0;
1251 if (termset_value_numeric != -2)
1254 sprintf(resname, "%d", termset_value_numeric);
1255 termset_name = resname;
1258 termset_name = termset_value_string;
1259 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1260 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1261 if (!grep_info->termset)
1263 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1272 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1273 Z_AttributesPlusTerm *zapt,
1275 const Odr_oid *attributeSet,
1277 const char *index_type, int complete_flag,
1278 const char *rank_type,
1279 const char *xpath_use,
1281 RSET **result_sets, int *num_result_sets,
1282 struct rset_key_control *kc,
1285 struct grep_info grep_info;
1286 const char *termp = termz;
1289 *num_result_sets = 0;
1290 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1296 if (alloc_sets == *num_result_sets)
1299 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1302 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1303 alloc_sets = alloc_sets + add;
1304 *result_sets = rnew;
1306 res = search_term(zh, zapt, &termp, attributeSet,
1308 index_type, complete_flag,
1310 xpath_use, rset_nmem,
1311 &(*result_sets)[*num_result_sets],
1313 if (res != ZEBRA_OK)
1316 for (i = 0; i < *num_result_sets; i++)
1317 rset_delete((*result_sets)[i]);
1318 grep_info_delete(&grep_info);
1321 if ((*result_sets)[*num_result_sets] == 0)
1323 (*num_result_sets)++;
1328 grep_info_delete(&grep_info);
1333 \brief Create result set(s) for list of terms
1334 \param zh Zebra Handle
1335 \param zapt Attributes Plust Term (RPN leaf)
1336 \param termz term as used in query but converted to UTF-8
1337 \param attributeSet default attribute set
1338 \param stream memory for result
1339 \param index_type register type ("w", "p",..)
1340 \param complete_flag whether it's phrases or not
1341 \param rank_type term flags for ranking
1342 \param xpath_use use attribute for X-Path (-1 for no X-path)
1343 \param rset_nmem memory for result sets
1344 \param result_sets output result set for each term in list (output)
1345 \param num_result_sets number of output result sets
1346 \param kc rset key control to be used for created result sets
1348 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1349 Z_AttributesPlusTerm *zapt,
1351 const Odr_oid *attributeSet,
1353 const char *index_type, int complete_flag,
1354 const char *rank_type,
1355 const char *xpath_use,
1357 RSET **result_sets, int *num_result_sets,
1358 struct rset_key_control *kc)
1360 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1361 if (zebra_maps_is_icu(zm))
1362 zebra_map_tokenize_start(zm, termz, strlen(termz));
1363 return search_terms_chrmap(zh, zapt, termz, attributeSet,
1364 stream, index_type, complete_flag,
1365 rank_type, xpath_use,
1366 rset_nmem, result_sets, num_result_sets,
1371 /** \brief limit a search by position - returns result set
1373 static ZEBRA_RES search_position(ZebraHandle zh,
1374 Z_AttributesPlusTerm *zapt,
1375 const Odr_oid *attributeSet,
1376 const char *index_type,
1379 struct rset_key_control *kc)
1385 char term_dict[100];
1389 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1391 attr_init_APT(&position, zapt, 3);
1392 position_value = attr_find(&position, NULL);
1393 switch(position_value)
1402 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1408 if (!zebra_maps_is_first_in_field(zm))
1410 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1415 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1416 attributeSet, &ord) != ZEBRA_OK)
1420 ord_len = key_SU_encode(ord, ord_buf);
1421 memcpy(term_dict, ord_buf, ord_len);
1422 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1423 val = dict_lookup(zh->reg->dict, term_dict);
1426 assert(*val == sizeof(ISAM_P));
1427 memcpy(&isam_p, val+1, sizeof(isam_p));
1429 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1435 /** \brief returns result set for phrase search
1437 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1438 Z_AttributesPlusTerm *zapt,
1439 const char *termz_org,
1440 const Odr_oid *attributeSet,
1442 const char *index_type,
1444 const char *rank_type,
1445 const char *xpath_use,
1448 struct rset_key_control *kc)
1450 RSET *result_sets = 0;
1451 int num_result_sets = 0;
1453 search_terms_list(zh, zapt, termz_org, attributeSet,
1454 stream, index_type, complete_flag,
1455 rank_type, xpath_use,
1457 &result_sets, &num_result_sets, kc);
1459 if (res != ZEBRA_OK)
1462 if (num_result_sets > 0)
1465 res = search_position(zh, zapt, attributeSet,
1467 rset_nmem, &first_set,
1469 if (res != ZEBRA_OK)
1472 for (i = 0; i<num_result_sets; i++)
1473 rset_delete(result_sets[i]);
1478 RSET *nsets = nmem_malloc(stream,
1479 sizeof(RSET) * (num_result_sets+1));
1480 nsets[0] = first_set;
1481 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1482 result_sets = nsets;
1486 if (num_result_sets == 0)
1487 *rset = rset_create_null(rset_nmem, kc, 0);
1488 else if (num_result_sets == 1)
1489 *rset = result_sets[0];
1491 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1492 num_result_sets, result_sets,
1493 1 /* ordered */, 0 /* exclusion */,
1494 3 /* relation */, 1 /* distance */);
1500 /** \brief returns result set for or-list search
1502 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1503 Z_AttributesPlusTerm *zapt,
1504 const char *termz_org,
1505 const Odr_oid *attributeSet,
1507 const char *index_type,
1509 const char *rank_type,
1510 const char *xpath_use,
1513 struct rset_key_control *kc)
1515 RSET *result_sets = 0;
1516 int num_result_sets = 0;
1519 search_terms_list(zh, zapt, termz_org, attributeSet,
1520 stream, index_type, complete_flag,
1521 rank_type, xpath_use,
1523 &result_sets, &num_result_sets, kc);
1524 if (res != ZEBRA_OK)
1527 for (i = 0; i<num_result_sets; i++)
1530 res = search_position(zh, zapt, attributeSet,
1532 rset_nmem, &first_set,
1534 if (res != ZEBRA_OK)
1536 for (i = 0; i<num_result_sets; i++)
1537 rset_delete(result_sets[i]);
1545 tmp_set[0] = first_set;
1546 tmp_set[1] = result_sets[i];
1548 result_sets[i] = rset_create_prox(
1549 rset_nmem, kc, kc->scope,
1551 1 /* ordered */, 0 /* exclusion */,
1552 3 /* relation */, 1 /* distance */);
1555 if (num_result_sets == 0)
1556 *rset = rset_create_null(rset_nmem, kc, 0);
1557 else if (num_result_sets == 1)
1558 *rset = result_sets[0];
1560 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1561 num_result_sets, result_sets);
1567 /** \brief returns result set for and-list search
1569 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1570 Z_AttributesPlusTerm *zapt,
1571 const char *termz_org,
1572 const Odr_oid *attributeSet,
1574 const char *index_type,
1576 const char *rank_type,
1577 const char *xpath_use,
1580 struct rset_key_control *kc)
1582 RSET *result_sets = 0;
1583 int num_result_sets = 0;
1586 search_terms_list(zh, zapt, termz_org, attributeSet,
1587 stream, index_type, complete_flag,
1588 rank_type, xpath_use,
1590 &result_sets, &num_result_sets,
1592 if (res != ZEBRA_OK)
1594 for (i = 0; i<num_result_sets; i++)
1597 res = search_position(zh, zapt, attributeSet,
1599 rset_nmem, &first_set,
1601 if (res != ZEBRA_OK)
1603 for (i = 0; i<num_result_sets; i++)
1604 rset_delete(result_sets[i]);
1612 tmp_set[0] = first_set;
1613 tmp_set[1] = result_sets[i];
1615 result_sets[i] = rset_create_prox(
1616 rset_nmem, kc, kc->scope,
1618 1 /* ordered */, 0 /* exclusion */,
1619 3 /* relation */, 1 /* distance */);
1624 if (num_result_sets == 0)
1625 *rset = rset_create_null(rset_nmem, kc, 0);
1626 else if (num_result_sets == 1)
1627 *rset = result_sets[0];
1629 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1630 num_result_sets, result_sets);
1636 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1637 const char **term_sub,
1639 const Odr_oid *attributeSet,
1640 struct grep_info *grep_info,
1650 WRBUF term_num = wrbuf_alloc();
1653 attr_init_APT(&relation, zapt, 2);
1654 relation_value = attr_find(&relation, NULL);
1656 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1658 switch (relation_value)
1661 yaz_log(log_level_rpn, "Relation <");
1662 if (!term_100(zm, term_sub, term_num, 1, display_term))
1664 wrbuf_destroy(term_num);
1667 term_value = atoi(wrbuf_cstr(term_num));
1668 gen_regular_rel(term_dict, term_value-1, 1);
1671 yaz_log(log_level_rpn, "Relation <=");
1672 if (!term_100(zm, term_sub, term_num, 1, display_term))
1674 wrbuf_destroy(term_num);
1677 term_value = atoi(wrbuf_cstr(term_num));
1678 gen_regular_rel(term_dict, term_value, 1);
1681 yaz_log(log_level_rpn, "Relation >=");
1682 if (!term_100(zm, term_sub, term_num, 1, display_term))
1684 wrbuf_destroy(term_num);
1687 term_value = atoi(wrbuf_cstr(term_num));
1688 gen_regular_rel(term_dict, term_value, 0);
1691 yaz_log(log_level_rpn, "Relation >");
1692 if (!term_100(zm, term_sub, term_num, 1, display_term))
1694 wrbuf_destroy(term_num);
1697 term_value = atoi(wrbuf_cstr(term_num));
1698 gen_regular_rel(term_dict, term_value+1, 0);
1702 yaz_log(log_level_rpn, "Relation =");
1703 if (!term_100(zm, term_sub, term_num, 1, display_term))
1705 wrbuf_destroy(term_num);
1708 term_value = atoi(wrbuf_cstr(term_num));
1709 wrbuf_printf(term_dict, "(0*%d)", term_value);
1712 /* term_tmp untouched.. */
1713 while (**term_sub != '\0')
1717 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1718 wrbuf_destroy(term_num);
1721 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1722 0, grep_info, max_pos, 0, grep_handle);
1725 zebra_set_partial_result(zh);
1727 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1728 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1729 wrbuf_destroy(term_num);
1733 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1734 const char **term_sub,
1736 const Odr_oid *attributeSet, NMEM stream,
1737 struct grep_info *grep_info,
1738 const char *index_type, int complete_flag,
1740 const char *xpath_use,
1741 struct ord_list **ol)
1744 struct rpn_char_map_info rcmi;
1746 int relation_error = 0;
1747 int ord, ord_len, i;
1749 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1751 *ol = ord_list_create(stream);
1753 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1757 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1758 attributeSet, &ord) != ZEBRA_OK)
1763 wrbuf_rewind(term_dict);
1765 *ol = ord_list_append(stream, *ol, ord);
1767 ord_len = key_SU_encode(ord, ord_buf);
1769 wrbuf_putc(term_dict, '(');
1770 for (i = 0; i < ord_len; i++)
1772 wrbuf_putc(term_dict, 1);
1773 wrbuf_putc(term_dict, ord_buf[i]);
1775 wrbuf_putc(term_dict, ')');
1777 if (!numeric_relation(zh, zapt, &termp, term_dict,
1778 attributeSet, grep_info, &max_pos, zm,
1779 display_term, &relation_error))
1783 zebra_setError(zh, relation_error, 0);
1790 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1795 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1796 Z_AttributesPlusTerm *zapt,
1798 const Odr_oid *attributeSet,
1800 const char *index_type,
1802 const char *rank_type,
1803 const char *xpath_use,
1806 struct rset_key_control *kc)
1808 const char *termp = termz;
1809 RSET *result_sets = 0;
1810 int num_result_sets = 0;
1812 struct grep_info grep_info;
1814 zint hits_limit_value;
1815 const char *term_ref_id_str = 0;
1817 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1820 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1821 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1825 struct ord_list *ol;
1826 WRBUF term_dict = wrbuf_alloc();
1827 WRBUF display_term = wrbuf_alloc();
1828 if (alloc_sets == num_result_sets)
1831 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1834 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1835 alloc_sets = alloc_sets + add;
1838 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1839 grep_info.isam_p_indx = 0;
1840 res = numeric_term(zh, zapt, &termp, term_dict,
1841 attributeSet, stream, &grep_info,
1842 index_type, complete_flag,
1843 display_term, xpath_use, &ol);
1844 wrbuf_destroy(term_dict);
1845 if (res == ZEBRA_FAIL || termp == 0)
1847 wrbuf_destroy(display_term);
1850 yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1851 result_sets[num_result_sets] =
1852 rset_trunc(zh, grep_info.isam_p_buf,
1853 grep_info.isam_p_indx, wrbuf_buf(display_term),
1854 wrbuf_len(display_term), rank_type,
1855 0 /* preserve position */,
1856 zapt->term->which, rset_nmem,
1857 kc, kc->scope, ol, index_type,
1860 wrbuf_destroy(display_term);
1861 if (!result_sets[num_result_sets])
1867 grep_info_delete(&grep_info);
1869 if (res != ZEBRA_OK)
1871 if (num_result_sets == 0)
1872 *rset = rset_create_null(rset_nmem, kc, 0);
1873 else if (num_result_sets == 1)
1874 *rset = result_sets[0];
1876 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1877 num_result_sets, result_sets);
1883 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1884 Z_AttributesPlusTerm *zapt,
1886 const Odr_oid *attributeSet,
1888 const char *rank_type, NMEM rset_nmem,
1890 struct rset_key_control *kc)
1893 zint sysno = atozint(termz);
1897 rec = rec_get(zh->reg->records, sysno);
1905 *rset = rset_create_null(rset_nmem, kc, 0);
1911 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1912 res_get(zh->res, "setTmpDir"), 0);
1913 rsfd = rset_open(*rset, RSETF_WRITE);
1918 rset_write(rsfd, &key);
1924 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1925 const Odr_oid *attributeSet, NMEM stream,
1926 Z_SortKeySpecList *sort_sequence,
1927 const char *rank_type,
1930 struct rset_key_control *kc)
1933 int sort_relation_value;
1934 AttrType sort_relation_type;
1939 attr_init_APT(&sort_relation_type, zapt, 7);
1940 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1942 if (!sort_sequence->specs)
1944 sort_sequence->num_specs = 10;
1945 sort_sequence->specs = (Z_SortKeySpec **)
1946 nmem_malloc(stream, sort_sequence->num_specs *
1947 sizeof(*sort_sequence->specs));
1948 for (i = 0; i<sort_sequence->num_specs; i++)
1949 sort_sequence->specs[i] = 0;
1951 if (zapt->term->which != Z_Term_general)
1954 i = atoi_n((char *) zapt->term->u.general->buf,
1955 zapt->term->u.general->len);
1956 if (i >= sort_sequence->num_specs)
1958 sprintf(termz, "%d", i);
1960 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1961 sks->sortElement = (Z_SortElement *)
1962 nmem_malloc(stream, sizeof(*sks->sortElement));
1963 sks->sortElement->which = Z_SortElement_generic;
1964 sk = sks->sortElement->u.generic = (Z_SortKey *)
1965 nmem_malloc(stream, sizeof(*sk));
1966 sk->which = Z_SortKey_sortAttributes;
1967 sk->u.sortAttributes = (Z_SortAttributes *)
1968 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1970 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1971 sk->u.sortAttributes->list = zapt->attributes;
1973 sks->sortRelation = (int *)
1974 nmem_malloc(stream, sizeof(*sks->sortRelation));
1975 if (sort_relation_value == 1)
1976 *sks->sortRelation = Z_SortKeySpec_ascending;
1977 else if (sort_relation_value == 2)
1978 *sks->sortRelation = Z_SortKeySpec_descending;
1980 *sks->sortRelation = Z_SortKeySpec_ascending;
1982 sks->caseSensitivity = (int *)
1983 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1984 *sks->caseSensitivity = 0;
1986 sks->which = Z_SortKeySpec_null;
1987 sks->u.null = odr_nullval ();
1988 sort_sequence->specs[i] = sks;
1989 *rset = rset_create_null(rset_nmem, kc, 0);
1994 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1995 const Odr_oid *attributeSet,
1996 struct xpath_location_step *xpath, int max,
1999 const Odr_oid *curAttributeSet = attributeSet;
2001 const char *use_string = 0;
2003 attr_init_APT(&use, zapt, 1);
2004 attr_find_ex(&use, &curAttributeSet, &use_string);
2006 if (!use_string || *use_string != '/')
2009 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2014 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2015 const char *index_type, const char *term,
2016 const char *xpath_use,
2018 struct rset_key_control *kc)
2020 struct grep_info grep_info;
2021 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2022 zinfo_index_category_index,
2023 index_type, xpath_use);
2024 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2025 return rset_create_null(rset_nmem, kc, 0);
2028 return rset_create_null(rset_nmem, kc, 0);
2034 WRBUF term_dict = wrbuf_alloc();
2035 int ord_len = key_SU_encode(ord, ord_buf);
2036 int term_type = Z_Term_characterString;
2037 const char *flags = "void";
2039 wrbuf_putc(term_dict, '(');
2040 for (i = 0; i<ord_len; i++)
2042 wrbuf_putc(term_dict, 1);
2043 wrbuf_putc(term_dict, ord_buf[i]);
2045 wrbuf_putc(term_dict, ')');
2046 wrbuf_puts(term_dict, term);
2048 grep_info.isam_p_indx = 0;
2049 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2050 &grep_info, &max_pos, 0, grep_handle);
2051 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2052 grep_info.isam_p_indx);
2053 rset = rset_trunc(zh, grep_info.isam_p_buf,
2054 grep_info.isam_p_indx, term, strlen(term),
2055 flags, 1, term_type, rset_nmem,
2056 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2057 0 /* term_ref_id_str */);
2058 grep_info_delete(&grep_info);
2059 wrbuf_destroy(term_dict);
2065 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2066 NMEM stream, const char *rank_type, RSET rset,
2067 int xpath_len, struct xpath_location_step *xpath,
2070 struct rset_key_control *kc)
2073 int always_matches = rset ? 0 : 1;
2081 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2082 for (i = 0; i<xpath_len; i++)
2084 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2096 a[@attr = value]/b[@other = othervalue]
2098 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2099 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2100 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2101 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2102 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2103 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2107 dict_grep_cmap(zh->reg->dict, 0, 0);
2110 int level = xpath_len;
2113 while (--level >= 0)
2115 WRBUF xpath_rev = wrbuf_alloc();
2117 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2119 for (i = level; i >= 1; --i)
2121 const char *cp = xpath[i].part;
2127 wrbuf_puts(xpath_rev, "[^/]*");
2128 else if (*cp == ' ')
2129 wrbuf_puts(xpath_rev, "\001 ");
2131 wrbuf_putc(xpath_rev, *cp);
2133 /* wrbuf_putc does not null-terminate , but
2134 wrbuf_puts below ensures it does.. so xpath_rev
2135 is OK iff length is > 0 */
2137 wrbuf_puts(xpath_rev, "/");
2139 else if (i == 1) /* // case */
2140 wrbuf_puts(xpath_rev, ".*");
2142 if (xpath[level].predicate &&
2143 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2144 xpath[level].predicate->u.relation.name[0])
2146 WRBUF wbuf = wrbuf_alloc();
2147 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2148 if (xpath[level].predicate->u.relation.value)
2150 const char *cp = xpath[level].predicate->u.relation.value;
2151 wrbuf_putc(wbuf, '=');
2155 if (strchr(REGEX_CHARS, *cp))
2156 wrbuf_putc(wbuf, '\\');
2157 wrbuf_putc(wbuf, *cp);
2161 rset_attr = xpath_trunc(
2162 zh, stream, "0", wrbuf_cstr(wbuf),
2163 ZEBRA_XPATH_ATTR_NAME,
2165 wrbuf_destroy(wbuf);
2171 wrbuf_destroy(xpath_rev);
2175 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2176 wrbuf_cstr(xpath_rev));
2177 if (wrbuf_len(xpath_rev))
2179 rset_start_tag = xpath_trunc(zh, stream, "0",
2180 wrbuf_cstr(xpath_rev),
2181 ZEBRA_XPATH_ELM_BEGIN,
2184 rset = rset_start_tag;
2187 rset_end_tag = xpath_trunc(zh, stream, "0",
2188 wrbuf_cstr(xpath_rev),
2189 ZEBRA_XPATH_ELM_END,
2192 rset = rset_create_between(rset_nmem, kc, kc->scope,
2193 rset_start_tag, rset,
2194 rset_end_tag, rset_attr);
2197 wrbuf_destroy(xpath_rev);
2205 #define MAX_XPATH_STEPS 10
2207 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2208 Z_AttributesPlusTerm *zapt,
2209 const Odr_oid *attributeSet, NMEM stream,
2210 Z_SortKeySpecList *sort_sequence,
2213 struct rset_key_control *kc);
2215 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2216 const Odr_oid *attributeSet, NMEM stream,
2217 Z_SortKeySpecList *sort_sequence,
2218 int num_bases, const char **basenames,
2221 struct rset_key_control *kc)
2223 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2224 ZEBRA_RES res = ZEBRA_OK;
2226 for (i = 0; i < num_bases; i++)
2229 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2231 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2236 res = rpn_search_database(zh, zapt, attributeSet, stream,
2238 rset_nmem, rsets+i, kc);
2239 if (res != ZEBRA_OK)
2242 if (res != ZEBRA_OK)
2243 { /* must clean up the already created sets */
2245 rset_delete(rsets[i]);
2252 else if (num_bases == 0)
2253 *rset = rset_create_null(rset_nmem, kc, 0);
2255 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2261 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2262 Z_AttributesPlusTerm *zapt,
2263 const Odr_oid *attributeSet, NMEM stream,
2264 Z_SortKeySpecList *sort_sequence,
2267 struct rset_key_control *kc)
2269 ZEBRA_RES res = ZEBRA_OK;
2270 const char *index_type;
2271 char *search_type = NULL;
2272 char rank_type[128];
2275 char termz[IT_MAX_WORD+1];
2277 const char *xpath_use = 0;
2278 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2282 log_level_rpn = yaz_log_module_level("rpn");
2285 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2286 rank_type, &complete_flag, &sort_flag);
2288 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2289 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2290 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2291 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2293 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2297 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2298 rank_type, rset_nmem, rset, kc);
2299 /* consider if an X-Path query is used */
2300 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2301 xpath, MAX_XPATH_STEPS, stream);
2304 if (xpath[xpath_len-1].part[0] == '@')
2305 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2307 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2314 attr_init_APT(&relation, zapt, 2);
2315 relation_value = attr_find(&relation, NULL);
2317 if (relation_value == 103) /* alwaysmatches */
2319 *rset = 0; /* signal no "term" set */
2320 return rpn_search_xpath(zh, stream, rank_type, *rset,
2321 xpath_len, xpath, rset_nmem, rset, kc);
2326 /* search using one of the various search type strategies
2327 termz is our UTF-8 search term
2328 attributeSet is top-level default attribute set
2329 stream is ODR for search
2330 reg_id is the register type
2331 complete_flag is 1 for complete subfield, 0 for incomplete
2332 xpath_use is use-attribute to be used for X-Path search, 0 for none
2334 if (!strcmp(search_type, "phrase"))
2336 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2337 index_type, complete_flag, rank_type,
2342 else if (!strcmp(search_type, "and-list"))
2344 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2345 index_type, complete_flag, rank_type,
2350 else if (!strcmp(search_type, "or-list"))
2352 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2353 index_type, complete_flag, rank_type,
2358 else if (!strcmp(search_type, "local"))
2360 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2361 rank_type, rset_nmem, rset, kc);
2363 else if (!strcmp(search_type, "numeric"))
2365 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2366 index_type, complete_flag, rank_type,
2373 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2376 if (res != ZEBRA_OK)
2380 return rpn_search_xpath(zh, stream, rank_type, *rset,
2381 xpath_len, xpath, rset_nmem, rset, kc);
2384 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2385 const Odr_oid *attributeSet,
2386 NMEM stream, NMEM rset_nmem,
2387 Z_SortKeySpecList *sort_sequence,
2388 int num_bases, const char **basenames,
2389 RSET **result_sets, int *num_result_sets,
2390 Z_Operator *parent_op,
2391 struct rset_key_control *kc);
2393 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2396 ZEBRA_RES res = ZEBRA_OK;
2397 if (zs->which == Z_RPNStructure_complex)
2399 if (res == ZEBRA_OK)
2400 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2402 if (res == ZEBRA_OK)
2403 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2406 else if (zs->which == Z_RPNStructure_simple)
2408 if (zs->u.simple->which == Z_Operand_APT)
2410 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2411 AttrType global_hits_limit_attr;
2414 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2416 l = attr_find(&global_hits_limit_attr, NULL);
2424 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2425 const Odr_oid *attributeSet,
2426 NMEM stream, NMEM rset_nmem,
2427 Z_SortKeySpecList *sort_sequence,
2428 int num_bases, const char **basenames,
2431 RSET *result_sets = 0;
2432 int num_result_sets = 0;
2434 struct rset_key_control *kc = zebra_key_control_create(zh);
2436 res = rpn_search_structure(zh, zs, attributeSet,
2439 num_bases, basenames,
2440 &result_sets, &num_result_sets,
2441 0 /* no parent op */,
2443 if (res != ZEBRA_OK)
2446 for (i = 0; i<num_result_sets; i++)
2447 rset_delete(result_sets[i]);
2452 assert(num_result_sets == 1);
2453 assert(result_sets);
2454 assert(*result_sets);
2455 *result_set = *result_sets;
2461 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2462 const Odr_oid *attributeSet,
2463 NMEM stream, NMEM rset_nmem,
2464 Z_SortKeySpecList *sort_sequence,
2465 int num_bases, const char **basenames,
2466 RSET **result_sets, int *num_result_sets,
2467 Z_Operator *parent_op,
2468 struct rset_key_control *kc)
2470 *num_result_sets = 0;
2471 if (zs->which == Z_RPNStructure_complex)
2474 Z_Operator *zop = zs->u.complex->roperator;
2475 RSET *result_sets_l = 0;
2476 int num_result_sets_l = 0;
2477 RSET *result_sets_r = 0;
2478 int num_result_sets_r = 0;
2480 res = rpn_search_structure(zh, zs->u.complex->s1,
2481 attributeSet, stream, rset_nmem,
2483 num_bases, basenames,
2484 &result_sets_l, &num_result_sets_l,
2486 if (res != ZEBRA_OK)
2489 for (i = 0; i<num_result_sets_l; i++)
2490 rset_delete(result_sets_l[i]);
2493 res = rpn_search_structure(zh, zs->u.complex->s2,
2494 attributeSet, stream, rset_nmem,
2496 num_bases, basenames,
2497 &result_sets_r, &num_result_sets_r,
2499 if (res != ZEBRA_OK)
2502 for (i = 0; i<num_result_sets_l; i++)
2503 rset_delete(result_sets_l[i]);
2504 for (i = 0; i<num_result_sets_r; i++)
2505 rset_delete(result_sets_r[i]);
2509 /* make a new list of result for all children */
2510 *num_result_sets = num_result_sets_l + num_result_sets_r;
2511 *result_sets = nmem_malloc(stream, *num_result_sets *
2512 sizeof(**result_sets));
2513 memcpy(*result_sets, result_sets_l,
2514 num_result_sets_l * sizeof(**result_sets));
2515 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2516 num_result_sets_r * sizeof(**result_sets));
2518 if (!parent_op || parent_op->which != zop->which
2519 || (zop->which != Z_Operator_and &&
2520 zop->which != Z_Operator_or))
2522 /* parent node different from this one (or non-present) */
2523 /* we must combine result sets now */
2527 case Z_Operator_and:
2528 rset = rset_create_and(rset_nmem, kc,
2530 *num_result_sets, *result_sets);
2533 rset = rset_create_or(rset_nmem, kc,
2534 kc->scope, 0, /* termid */
2535 *num_result_sets, *result_sets);
2537 case Z_Operator_and_not:
2538 rset = rset_create_not(rset_nmem, kc,
2543 case Z_Operator_prox:
2544 if (zop->u.prox->which != Z_ProximityOperator_known)
2547 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2551 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2553 zebra_setError_zint(zh,
2554 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2555 *zop->u.prox->u.known);
2560 rset = rset_create_prox(rset_nmem, kc,
2562 *num_result_sets, *result_sets,
2563 *zop->u.prox->ordered,
2564 (!zop->u.prox->exclusion ?
2565 0 : *zop->u.prox->exclusion),
2566 *zop->u.prox->relationType,
2567 *zop->u.prox->distance );
2571 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2574 *num_result_sets = 1;
2575 *result_sets = nmem_malloc(stream, *num_result_sets *
2576 sizeof(**result_sets));
2577 (*result_sets)[0] = rset;
2580 else if (zs->which == Z_RPNStructure_simple)
2585 if (zs->u.simple->which == Z_Operand_APT)
2587 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2588 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2589 attributeSet, stream, sort_sequence,
2590 num_bases, basenames, rset_nmem, &rset,
2592 if (res != ZEBRA_OK)
2595 else if (zs->u.simple->which == Z_Operand_resultSetId)
2597 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2598 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2602 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2603 zs->u.simple->u.resultSetId);
2610 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2613 *num_result_sets = 1;
2614 *result_sets = nmem_malloc(stream, *num_result_sets *
2615 sizeof(**result_sets));
2616 (*result_sets)[0] = rset;
2620 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2631 * indent-tabs-mode: nil
2633 * vim: shiftwidth=4 tabstop=8 expandtab