1 /* $Id: rpnsearch.c,v 1.8 2007-01-17 12:59:38 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65 struct rpn_char_map_info *map_info)
67 map_info->zm = reg->zebra_maps;
68 map_info->reg_type = reg_type;
69 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
87 static int add_isam_p(const char *name, const char *info,
92 log_level_rpn = yaz_log_module_level("rpn");
95 /* we may have to stop this madness.. NOTE: -1 so that if
96 truncmax == trunxlimit we do *not* generate result sets */
97 if (p->isam_p_indx >= p->trunc_max - 1)
100 if (p->isam_p_indx == p->isam_p_size)
102 ISAM_P *new_isam_p_buf;
106 p->isam_p_size = 2*p->isam_p_size + 100;
107 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
111 memcpy(new_isam_p_buf, p->isam_p_buf,
112 p->isam_p_indx * sizeof(*p->isam_p_buf));
113 xfree(p->isam_p_buf);
115 p->isam_p_buf = new_isam_p_buf;
118 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121 memcpy(new_term_no, p->isam_p_buf,
122 p->isam_p_indx * sizeof(*p->term_no));
125 p->term_no = new_term_no;
128 assert(*info == sizeof(*p->isam_p_buf));
129 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
134 char term_tmp[IT_MAX_WORD];
136 const char *index_name;
137 int len = key_SU_decode (&ord, (const unsigned char *) name);
139 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
140 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141 zebraExplain_lookup_ord(p->zh->reg->zei,
142 ord, 0 /* index_type */, &db, &index_name);
143 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
145 resultSetAddTerm(p->zh, p->termset, name[len], db,
146 index_name, term_tmp);
152 static int grep_handle(char *name, const char *info, void *p)
154 return add_isam_p(name, info, (struct grep_info *) p);
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158 const char *ct1, const char *ct2, int first)
160 const char *s1, *s0 = *src;
163 /* skip white space */
166 if (ct1 && strchr(ct1, *s0))
168 if (ct2 && strchr(ct2, *s0))
171 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k<in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " []()|.*+?!"
209 /* term_100: handle term, where trunc = none(no operators at all) */
210 static int term_100(ZebraMaps zebra_maps, int reg_type,
211 const char **src, char *dst, int space_split,
219 const char *space_start = 0;
220 const char *space_end = 0;
222 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
229 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
233 if (**map == *CHR_SPACE)
236 else /* complete subfield only. */
238 if (**map == *CHR_SPACE)
239 { /* save space mapping for later .. */
244 else if (space_start)
245 { /* reload last space */
246 while (space_start < space_end)
248 if (strchr(REGEX_CHARS, *space_start))
250 dst_term[j++] = *space_start;
251 dst[i++] = *space_start++;
254 space_start = space_end = 0;
257 /* add non-space char */
258 memcpy(dst_term+j, s1, s0 - s1);
264 if (strchr(REGEX_CHARS, *s1))
272 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
274 strcpy(dst + i, map[0]);
284 /* term_101: handle term, where trunc = Process # */
285 static int term_101(ZebraMaps zebra_maps, int reg_type,
286 const char **src, char *dst, int space_split,
294 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
303 dst_term[j++] = *s0++;
309 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
311 if (space_split && **map == *CHR_SPACE)
314 /* add non-space char */
315 memcpy(dst_term+j, s1, s0 - s1);
321 if (strchr(REGEX_CHARS, *s1))
329 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
331 strcpy(dst + i, map[0]);
337 dst_term[j++] = '\0';
342 /* term_103: handle term, where trunc = re-2 (regular expressions) */
343 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
344 char *dst, int *errors, int space_split,
352 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
355 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
356 isdigit(((const unsigned char *)s0)[1]))
358 *errors = s0[1] - '0';
365 if (strchr("^\\()[].*+?|-", *s0))
374 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
376 if (space_split && **map == *CHR_SPACE)
379 /* add non-space char */
380 memcpy(dst_term+j, s1, s0 - s1);
386 if (strchr(REGEX_CHARS, *s1))
394 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
396 strcpy(dst + i, map[0]);
408 /* term_103: handle term, where trunc = re-1 (regular expressions) */
409 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
410 char *dst, int space_split, char *dst_term)
412 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
417 /* term_104: handle term, where trunc = Process # and ! */
418 static int term_104(ZebraMaps zebra_maps, int reg_type,
419 const char **src, char *dst, int space_split,
427 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
434 dst_term[j++] = *s0++;
435 if (*s0 >= '0' && *s0 <= '9')
438 while (*s0 >= '0' && *s0 <= '9')
440 limit = limit * 10 + (*s0 - '0');
441 dst_term[j++] = *s0++;
461 dst_term[j++] = *s0++;
466 dst_term[j++] = *s0++;
472 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
474 if (space_split && **map == *CHR_SPACE)
477 /* add non-space char */
478 memcpy(dst_term+j, s1, s0 - s1);
484 if (strchr(REGEX_CHARS, *s1))
492 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
494 strcpy(dst + i, map[0]);
500 dst_term[j++] = '\0';
505 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
506 static int term_105(ZebraMaps zebra_maps, int reg_type,
507 const char **src, char *dst, int space_split,
508 char *dst_term, int right_truncate)
515 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
524 dst_term[j++] = *s0++;
529 dst_term[j++] = *s0++;
535 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
537 if (space_split && **map == *CHR_SPACE)
540 /* add non-space char */
541 memcpy(dst_term+j, s1, s0 - s1);
547 if (strchr(REGEX_CHARS, *s1))
555 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
557 strcpy(dst + i, map[0]);
569 dst_term[j++] = '\0';
575 /* gen_regular_rel - generate regular expression from relation
576 * val: border value (inclusive)
577 * islt: 1 if <=; 0 if >=.
579 static void gen_regular_rel(char *dst, int val, int islt)
586 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
590 strcpy(dst, "(-[0-9]+|(");
598 strcpy(dst, "([0-9]+|-(");
610 sprintf(numstr, "%d", val);
611 for (w = strlen(numstr); --w >= 0; pos++)
630 strcpy(dst + dst_p, numstr);
631 dst_p = strlen(dst) - pos - 1;
659 for (i = 0; i<pos; i++)
672 /* match everything less than 10^(pos-1) */
674 for (i = 1; i<pos; i++)
675 strcat(dst, "[0-9]?");
679 /* match everything greater than 10^pos */
680 for (i = 0; i <= pos; i++)
681 strcat(dst, "[0-9]");
682 strcat(dst, "[0-9]*");
687 void string_rel_add_char(char **term_p, const char *src, int *indx)
689 if (src[*indx] == '\\')
690 *(*term_p)++ = src[(*indx)++];
691 *(*term_p)++ = src[(*indx)++];
695 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
696 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
697 * >= abc ([b-].*|a[c-].*|ab[c-].*)
698 * ([^-a].*|a[^-b].*|ab[c-].*)
699 * < abc ([-0].*|a[-a].*|ab[-b].*)
700 * ([^a-].*|a[^b-].*|ab[^c-].*)
701 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
702 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
704 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
705 const char **term_sub, char *term_dict,
706 oid_value attributeSet,
707 int reg_type, int space_split, char *term_dst,
713 char *term_tmp = term_dict + strlen(term_dict);
714 char term_component[2*IT_MAX_WORD+20];
716 attr_init_APT(&relation, zapt, 2);
717 relation_value = attr_find(&relation, NULL);
720 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
721 switch (relation_value)
724 if (!term_100(zh->reg->zebra_maps, reg_type,
725 term_sub, term_component,
726 space_split, term_dst))
728 yaz_log(log_level_rpn, "Relation <");
731 for (i = 0; term_component[i]; )
738 string_rel_add_char(&term_tmp, term_component, &j);
745 *term_tmp++ = FIRST_IN_FIELD_CHAR;
747 string_rel_add_char(&term_tmp, term_component, &i);
754 if ((term_tmp - term_dict) > IT_MAX_WORD)
759 yaz_log(YLOG_LOG, "term_dict=%s", term_dict);
762 if (!term_100(zh->reg->zebra_maps, reg_type,
763 term_sub, term_component,
764 space_split, term_dst))
766 yaz_log(log_level_rpn, "Relation <=");
769 for (i = 0; term_component[i]; )
774 string_rel_add_char(&term_tmp, term_component, &j);
780 *term_tmp++ = FIRST_IN_FIELD_CHAR;
782 string_rel_add_char(&term_tmp, term_component, &i);
791 if ((term_tmp - term_dict) > IT_MAX_WORD)
794 for (i = 0; term_component[i]; )
795 string_rel_add_char(&term_tmp, term_component, &i);
800 if (!term_100 (zh->reg->zebra_maps, reg_type,
801 term_sub, term_component, space_split, term_dst))
803 yaz_log(log_level_rpn, "Relation >");
806 for (i = 0; term_component[i];)
811 string_rel_add_char(&term_tmp, term_component, &j);
816 string_rel_add_char(&term_tmp, term_component, &i);
824 if ((term_tmp - term_dict) > IT_MAX_WORD)
827 for (i = 0; term_component[i];)
828 string_rel_add_char(&term_tmp, term_component, &i);
835 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
836 term_component, space_split, term_dst))
838 yaz_log(log_level_rpn, "Relation >=");
841 for (i = 0; term_component[i];)
848 string_rel_add_char(&term_tmp, term_component, &j);
851 if (term_component[i+1])
855 string_rel_add_char(&term_tmp, term_component, &i);
859 string_rel_add_char(&term_tmp, term_component, &i);
866 if ((term_tmp - term_dict) > IT_MAX_WORD)
877 yaz_log(log_level_rpn, "Relation =");
878 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
879 term_component, space_split, term_dst))
881 strcat(term_tmp, "(");
882 strcat(term_tmp, term_component);
883 strcat(term_tmp, ")");
886 yaz_log(log_level_rpn, "Relation always matches");
887 /* skip to end of term (we don't care what it is) */
888 while (**term_sub != '\0')
892 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
898 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
899 const char **term_sub,
900 oid_value attributeSet, NMEM stream,
901 struct grep_info *grep_info,
902 int reg_type, int complete_flag,
903 int num_bases, char **basenames,
905 const char *xpath_use,
906 struct ord_list **ol);
908 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
909 Z_AttributesPlusTerm *zapt,
910 zint *hits_limit_value,
911 const char **term_ref_id_str,
914 AttrType term_ref_id_attr;
915 AttrType hits_limit_attr;
918 attr_init_APT(&hits_limit_attr, zapt, 11);
919 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
921 attr_init_APT(&term_ref_id_attr, zapt, 10);
922 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
923 if (term_ref_id_int >= 0)
925 char *res = nmem_malloc(nmem, 20);
926 sprintf(res, "%d", term_ref_id_int);
927 *term_ref_id_str = res;
930 /* no limit given ? */
931 if (*hits_limit_value == -1)
933 if (*term_ref_id_str)
935 /* use global if term_ref is present */
936 *hits_limit_value = zh->approx_limit;
940 /* no counting if term_ref is not present */
941 *hits_limit_value = 0;
944 else if (*hits_limit_value == 0)
946 /* 0 is the same as global limit */
947 *hits_limit_value = zh->approx_limit;
949 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
950 *term_ref_id_str ? *term_ref_id_str : "none",
955 static ZEBRA_RES term_trunc(ZebraHandle zh,
956 Z_AttributesPlusTerm *zapt,
957 const char **term_sub,
958 oid_value attributeSet, NMEM stream,
959 struct grep_info *grep_info,
960 int reg_type, int complete_flag,
961 int num_bases, char **basenames,
963 const char *rank_type,
964 const char *xpath_use,
967 struct rset_key_control *kc)
971 zint hits_limit_value;
972 const char *term_ref_id_str = 0;
975 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
976 grep_info->isam_p_indx = 0;
977 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
978 reg_type, complete_flag, num_bases, basenames,
979 term_dst, xpath_use, &ol);
982 if (!*term_sub) /* no more terms ? */
984 yaz_log(log_level_rpn, "term: %s", term_dst);
985 *rset = rset_trunc(zh, grep_info->isam_p_buf,
986 grep_info->isam_p_indx, term_dst,
987 strlen(term_dst), rank_type, 1 /* preserve pos */,
988 zapt->term->which, rset_nmem,
989 kc, kc->scope, ol, reg_type, hits_limit_value,
996 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
997 const char **term_sub,
998 oid_value attributeSet, NMEM stream,
999 struct grep_info *grep_info,
1000 int reg_type, int complete_flag,
1001 int num_bases, char **basenames,
1003 const char *xpath_use,
1004 struct ord_list **ol)
1006 char term_dict[2*IT_MAX_WORD+4000];
1008 AttrType truncation;
1009 int truncation_value;
1011 struct rpn_char_map_info rcmi;
1012 int space_split = complete_flag ? 0 : 1;
1014 int bases_ok = 0; /* no of databases with OK attribute */
1016 *ol = ord_list_create(stream);
1018 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1019 attr_init_APT(&truncation, zapt, 5);
1020 truncation_value = attr_find(&truncation, NULL);
1021 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1023 for (base_no = 0; base_no < num_bases; base_no++)
1026 int regex_range = 0;
1027 int max_pos, prefix_len = 0;
1032 termp = *term_sub; /* start of term for each database */
1034 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1036 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1037 basenames[base_no]);
1041 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1042 attributeSet, &ord) != ZEBRA_OK)
1047 *ol = ord_list_append(stream, *ol, ord);
1048 ord_len = key_SU_encode (ord, ord_buf);
1050 term_dict[prefix_len++] = '(';
1051 for (i = 0; i<ord_len; i++)
1053 term_dict[prefix_len++] = 1; /* our internal regexp escape char */
1054 term_dict[prefix_len++] = ord_buf[i];
1056 term_dict[prefix_len++] = ')';
1057 term_dict[prefix_len] = '\0';
1059 switch (truncation_value)
1061 case -1: /* not specified */
1062 case 100: /* do not truncate */
1063 if (!string_relation(zh, zapt, &termp, term_dict,
1065 reg_type, space_split, term_dst,
1070 zebra_setError(zh, relation_error, 0);
1077 case 1: /* right truncation */
1078 term_dict[j++] = '(';
1079 if (!term_100(zh->reg->zebra_maps, reg_type,
1080 &termp, term_dict + j, space_split, term_dst))
1085 strcat(term_dict, ".*)");
1087 case 2: /* keft truncation */
1088 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1089 if (!term_100(zh->reg->zebra_maps, reg_type,
1090 &termp, term_dict + j, space_split, term_dst))
1095 strcat(term_dict, ")");
1097 case 3: /* left&right truncation */
1098 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1099 if (!term_100(zh->reg->zebra_maps, reg_type,
1100 &termp, term_dict + j, space_split, term_dst))
1105 strcat(term_dict, ".*)");
1107 case 101: /* process # in term */
1108 term_dict[j++] = '(';
1109 if (!term_101(zh->reg->zebra_maps, reg_type,
1110 &termp, term_dict + j, space_split, term_dst))
1115 strcat(term_dict, ")");
1117 case 102: /* Regexp-1 */
1118 term_dict[j++] = '(';
1119 if (!term_102(zh->reg->zebra_maps, reg_type,
1120 &termp, term_dict + j, space_split, term_dst))
1125 strcat(term_dict, ")");
1127 case 103: /* Regexp-2 */
1129 term_dict[j++] = '(';
1130 if (!term_103(zh->reg->zebra_maps, reg_type,
1131 &termp, term_dict + j, ®ex_range,
1132 space_split, term_dst))
1137 strcat(term_dict, ")");
1139 case 104: /* process # and ! in term */
1140 term_dict[j++] = '(';
1141 if (!term_104(zh->reg->zebra_maps, reg_type,
1142 &termp, term_dict + j, space_split, term_dst))
1147 strcat(term_dict, ")");
1149 case 105: /* process * and ! in term */
1150 term_dict[j++] = '(';
1151 if (!term_105(zh->reg->zebra_maps, reg_type,
1152 &termp, term_dict + j, space_split, term_dst, 1))
1157 strcat(term_dict, ")");
1159 case 106: /* process * and ! in term */
1160 term_dict[j++] = '(';
1161 if (!term_105(zh->reg->zebra_maps, reg_type,
1162 &termp, term_dict + j, space_split, term_dst, 0))
1167 strcat(term_dict, ")");
1170 zebra_setError_zint(zh,
1171 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1178 const char *input = term_dict + prefix_len;
1179 esc_str(buf, sizeof(buf), input, strlen(input));
1181 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1182 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1183 grep_info, &max_pos,
1184 ord_len /* number of "exact" chars */,
1187 zebra_set_partial_result(zh);
1189 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1194 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1200 static void grep_info_delete(struct grep_info *grep_info)
1203 xfree(grep_info->term_no);
1205 xfree(grep_info->isam_p_buf);
1208 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1209 Z_AttributesPlusTerm *zapt,
1210 struct grep_info *grep_info,
1214 grep_info->term_no = 0;
1216 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1217 grep_info->isam_p_size = 0;
1218 grep_info->isam_p_buf = NULL;
1220 grep_info->reg_type = reg_type;
1221 grep_info->termset = 0;
1227 attr_init_APT(&truncmax, zapt, 13);
1228 truncmax_value = attr_find(&truncmax, NULL);
1229 if (truncmax_value != -1)
1230 grep_info->trunc_max = truncmax_value;
1235 int termset_value_numeric;
1236 const char *termset_value_string;
1238 attr_init_APT(&termset, zapt, 8);
1239 termset_value_numeric =
1240 attr_find_ex(&termset, NULL, &termset_value_string);
1241 if (termset_value_numeric != -1)
1244 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1248 const char *termset_name = 0;
1249 if (termset_value_numeric != -2)
1252 sprintf(resname, "%d", termset_value_numeric);
1253 termset_name = resname;
1256 termset_name = termset_value_string;
1257 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1258 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1259 if (!grep_info->termset)
1261 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1271 \brief Create result set(s) for list of terms
1272 \param zh Zebra Handle
1273 \param zapt Attributes Plust Term (RPN leaf)
1274 \param termz term as used in query but converted to UTF-8
1275 \param attributeSet default attribute set
1276 \param stream memory for result
1277 \param reg_type register type ('w', 'p',..)
1278 \param complete_flag whether it's phrases or not
1279 \param rank_type term flags for ranking
1280 \param xpath_use use attribute for X-Path (-1 for no X-path)
1281 \param num_bases number of databases
1282 \param basenames array of databases
1283 \param rset_nmem memory for result sets
1284 \param result_sets output result set for each term in list (output)
1285 \param num_result_sets number of output result sets
1286 \param kc rset key control to be used for created result sets
1288 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1289 Z_AttributesPlusTerm *zapt,
1291 oid_value attributeSet,
1293 int reg_type, int complete_flag,
1294 const char *rank_type,
1295 const char *xpath_use,
1296 int num_bases, char **basenames,
1298 RSET **result_sets, int *num_result_sets,
1299 struct rset_key_control *kc)
1301 char term_dst[IT_MAX_WORD+1];
1302 struct grep_info grep_info;
1303 const char *termp = termz;
1306 *num_result_sets = 0;
1308 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1314 if (alloc_sets == *num_result_sets)
1317 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1320 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1321 alloc_sets = alloc_sets + add;
1322 *result_sets = rnew;
1324 res = term_trunc(zh, zapt, &termp, attributeSet,
1326 reg_type, complete_flag,
1327 num_bases, basenames,
1328 term_dst, rank_type,
1329 xpath_use, rset_nmem,
1330 &(*result_sets)[*num_result_sets],
1332 if (res != ZEBRA_OK)
1335 for (i = 0; i < *num_result_sets; i++)
1336 rset_delete((*result_sets)[i]);
1337 grep_info_delete (&grep_info);
1340 if ((*result_sets)[*num_result_sets] == 0)
1342 (*num_result_sets)++;
1347 grep_info_delete(&grep_info);
1351 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1352 Z_AttributesPlusTerm *zapt,
1353 oid_value attributeSet,
1355 int num_bases, char **basenames,
1358 struct rset_key_control *kc)
1366 attr_init_APT(&position, zapt, 3);
1367 position_value = attr_find(&position, NULL);
1368 switch(position_value)
1377 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1382 if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1384 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1389 if (!zh->reg->isamb && !zh->reg->isamc)
1391 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1395 f_set = xmalloc(sizeof(RSET) * num_bases);
1396 for (base_no = 0; base_no < num_bases; base_no++)
1400 char term_dict[100];
1405 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1407 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1408 basenames[base_no]);
1412 if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1413 attributeSet, &ord) != ZEBRA_OK)
1416 ord_len = key_SU_encode (ord, ord_buf);
1417 memcpy(term_dict, ord_buf, ord_len);
1418 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1419 val = dict_lookup(zh->reg->dict, term_dict);
1422 assert(*val == sizeof(ISAM_P));
1423 memcpy(&isam_p, val+1, sizeof(isam_p));
1427 f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1428 zh->reg->isamb, isam_p, 0);
1429 else if (zh->reg->isamc)
1430 f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1431 zh->reg->isamc, isam_p, 0);
1435 *rset = rset_create_or(rset_nmem, kc, kc->scope,
1436 0 /* termid */, num_sets, f_set);
1442 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1443 Z_AttributesPlusTerm *zapt,
1444 const char *termz_org,
1445 oid_value attributeSet,
1447 int reg_type, int complete_flag,
1448 const char *rank_type,
1449 const char *xpath_use,
1450 int num_bases, char **basenames,
1453 struct rset_key_control *kc)
1455 RSET *result_sets = 0;
1456 int num_result_sets = 0;
1458 term_list_trunc(zh, zapt, termz_org, attributeSet,
1459 stream, reg_type, complete_flag,
1460 rank_type, xpath_use,
1461 num_bases, basenames,
1463 &result_sets, &num_result_sets, kc);
1465 if (res != ZEBRA_OK)
1468 if (num_result_sets > 0)
1471 res = rpn_search_APT_position(zh, zapt, attributeSet,
1473 num_bases, basenames,
1474 rset_nmem, &first_set,
1476 if (res != ZEBRA_OK)
1480 RSET *nsets = nmem_malloc(stream,
1481 sizeof(RSET) * (num_result_sets+1));
1482 nsets[0] = first_set;
1483 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1484 result_sets = nsets;
1488 if (num_result_sets == 0)
1489 *rset = rset_create_null(rset_nmem, kc, 0);
1490 else if (num_result_sets == 1)
1491 *rset = result_sets[0];
1493 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1494 num_result_sets, result_sets,
1495 1 /* ordered */, 0 /* exclusion */,
1496 3 /* relation */, 1 /* distance */);
1502 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1503 Z_AttributesPlusTerm *zapt,
1504 const char *termz_org,
1505 oid_value attributeSet,
1507 int reg_type, int complete_flag,
1508 const char *rank_type,
1509 const char *xpath_use,
1510 int num_bases, char **basenames,
1513 struct rset_key_control *kc)
1515 RSET *result_sets = 0;
1516 int num_result_sets = 0;
1519 term_list_trunc(zh, zapt, termz_org, attributeSet,
1520 stream, reg_type, complete_flag,
1521 rank_type, xpath_use,
1522 num_bases, basenames,
1524 &result_sets, &num_result_sets, kc);
1525 if (res != ZEBRA_OK)
1528 for (i = 0; i<num_result_sets; i++)
1531 res = rpn_search_APT_position(zh, zapt, attributeSet,
1533 num_bases, basenames,
1534 rset_nmem, &first_set,
1536 if (res != ZEBRA_OK)
1538 for (i = 0; i<num_result_sets; i++)
1539 rset_delete(result_sets[i]);
1547 tmp_set[0] = first_set;
1548 tmp_set[1] = result_sets[i];
1550 result_sets[i] = rset_create_prox(
1551 rset_nmem, kc, kc->scope,
1553 1 /* ordered */, 0 /* exclusion */,
1554 3 /* relation */, 1 /* distance */);
1557 if (num_result_sets == 0)
1558 *rset = rset_create_null(rset_nmem, kc, 0);
1559 else if (num_result_sets == 1)
1560 *rset = result_sets[0];
1562 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1563 num_result_sets, result_sets);
1569 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1570 Z_AttributesPlusTerm *zapt,
1571 const char *termz_org,
1572 oid_value attributeSet,
1574 int reg_type, int complete_flag,
1575 const char *rank_type,
1576 const char *xpath_use,
1577 int num_bases, char **basenames,
1580 struct rset_key_control *kc)
1582 RSET *result_sets = 0;
1583 int num_result_sets = 0;
1586 term_list_trunc(zh, zapt, termz_org, attributeSet,
1587 stream, reg_type, complete_flag,
1588 rank_type, xpath_use,
1589 num_bases, basenames,
1591 &result_sets, &num_result_sets,
1593 if (res != ZEBRA_OK)
1595 for (i = 0; i<num_result_sets; i++)
1598 res = rpn_search_APT_position(zh, zapt, attributeSet,
1600 num_bases, basenames,
1601 rset_nmem, &first_set,
1603 if (res != ZEBRA_OK)
1605 for (i = 0; i<num_result_sets; i++)
1606 rset_delete(result_sets[i]);
1614 tmp_set[0] = first_set;
1615 tmp_set[1] = result_sets[i];
1617 result_sets[i] = rset_create_prox(
1618 rset_nmem, kc, kc->scope,
1620 1 /* ordered */, 0 /* exclusion */,
1621 3 /* relation */, 1 /* distance */);
1626 if (num_result_sets == 0)
1627 *rset = rset_create_null(rset_nmem, kc, 0);
1628 else if (num_result_sets == 1)
1629 *rset = result_sets[0];
1631 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1632 num_result_sets, result_sets);
1638 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1639 const char **term_sub,
1641 oid_value attributeSet,
1642 struct grep_info *grep_info,
1652 char *term_tmp = term_dict + strlen(term_dict);
1655 attr_init_APT(&relation, zapt, 2);
1656 relation_value = attr_find(&relation, NULL);
1658 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1660 switch (relation_value)
1663 yaz_log(log_level_rpn, "Relation <");
1664 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1667 term_value = atoi (term_tmp);
1668 gen_regular_rel(term_tmp, term_value-1, 1);
1671 yaz_log(log_level_rpn, "Relation <=");
1672 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1675 term_value = atoi (term_tmp);
1676 gen_regular_rel(term_tmp, term_value, 1);
1679 yaz_log(log_level_rpn, "Relation >=");
1680 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1683 term_value = atoi (term_tmp);
1684 gen_regular_rel(term_tmp, term_value, 0);
1687 yaz_log(log_level_rpn, "Relation >");
1688 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1691 term_value = atoi (term_tmp);
1692 gen_regular_rel(term_tmp, term_value+1, 0);
1696 yaz_log(log_level_rpn, "Relation =");
1697 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1700 term_value = atoi (term_tmp);
1701 sprintf(term_tmp, "(0*%d)", term_value);
1704 /* term_tmp untouched.. */
1705 while (**term_sub != '\0')
1709 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1712 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1713 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1717 zebra_set_partial_result(zh);
1719 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1720 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1724 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1725 const char **term_sub,
1726 oid_value attributeSet, NMEM stream,
1727 struct grep_info *grep_info,
1728 int reg_type, int complete_flag,
1729 int num_bases, char **basenames,
1731 const char *xpath_use,
1732 struct ord_list **ol)
1734 char term_dict[2*IT_MAX_WORD+2];
1737 struct rpn_char_map_info rcmi;
1739 int bases_ok = 0; /* no of databases with OK attribute */
1741 *ol = ord_list_create(stream);
1743 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1745 for (base_no = 0; base_no < num_bases; base_no++)
1747 int max_pos, prefix_len = 0;
1748 int relation_error = 0;
1749 int ord, ord_len, i;
1754 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1756 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1757 basenames[base_no]);
1761 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1762 attributeSet, &ord) != ZEBRA_OK)
1766 *ol = ord_list_append(stream, *ol, ord);
1768 ord_len = key_SU_encode (ord, ord_buf);
1770 term_dict[prefix_len++] = '(';
1771 for (i = 0; i < ord_len; i++)
1773 term_dict[prefix_len++] = 1;
1774 term_dict[prefix_len++] = ord_buf[i];
1776 term_dict[prefix_len++] = ')';
1777 term_dict[prefix_len] = '\0';
1779 if (!numeric_relation(zh, zapt, &termp, term_dict,
1780 attributeSet, grep_info, &max_pos, reg_type,
1781 term_dst, &relation_error))
1785 zebra_setError(zh, relation_error, 0);
1795 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1800 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1801 Z_AttributesPlusTerm *zapt,
1803 oid_value attributeSet,
1805 int reg_type, int complete_flag,
1806 const char *rank_type,
1807 const char *xpath_use,
1808 int num_bases, char **basenames,
1811 struct rset_key_control *kc)
1813 char term_dst[IT_MAX_WORD+1];
1814 const char *termp = termz;
1815 RSET *result_sets = 0;
1816 int num_result_sets = 0;
1818 struct grep_info grep_info;
1820 zint hits_limit_value;
1821 const char *term_ref_id_str = 0;
1823 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1825 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1826 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1830 struct ord_list *ol;
1831 if (alloc_sets == num_result_sets)
1834 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1837 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1838 alloc_sets = alloc_sets + add;
1841 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1842 grep_info.isam_p_indx = 0;
1843 res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1844 reg_type, complete_flag, num_bases, basenames,
1845 term_dst, xpath_use, &ol);
1846 if (res == ZEBRA_FAIL || termp == 0)
1848 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1849 result_sets[num_result_sets] =
1850 rset_trunc(zh, grep_info.isam_p_buf,
1851 grep_info.isam_p_indx, term_dst,
1852 strlen(term_dst), rank_type,
1853 0 /* preserve position */,
1854 zapt->term->which, rset_nmem,
1855 kc, kc->scope, ol, reg_type,
1858 if (!result_sets[num_result_sets])
1864 grep_info_delete(&grep_info);
1866 if (res != ZEBRA_OK)
1868 if (num_result_sets == 0)
1869 *rset = rset_create_null(rset_nmem, kc, 0);
1870 else if (num_result_sets == 1)
1871 *rset = result_sets[0];
1873 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1874 num_result_sets, result_sets);
1880 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1881 Z_AttributesPlusTerm *zapt,
1883 oid_value attributeSet,
1885 const char *rank_type, NMEM rset_nmem,
1887 struct rset_key_control *kc)
1892 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1893 res_get (zh->res, "setTmpDir"),0 );
1894 rsfd = rset_open(*rset, RSETF_WRITE);
1902 rset_write (rsfd, &key);
1907 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1908 oid_value attributeSet, NMEM stream,
1909 Z_SortKeySpecList *sort_sequence,
1910 const char *rank_type,
1913 struct rset_key_control *kc)
1916 int sort_relation_value;
1917 AttrType sort_relation_type;
1924 attr_init_APT(&sort_relation_type, zapt, 7);
1925 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1927 if (!sort_sequence->specs)
1929 sort_sequence->num_specs = 10;
1930 sort_sequence->specs = (Z_SortKeySpec **)
1931 nmem_malloc(stream, sort_sequence->num_specs *
1932 sizeof(*sort_sequence->specs));
1933 for (i = 0; i<sort_sequence->num_specs; i++)
1934 sort_sequence->specs[i] = 0;
1936 if (zapt->term->which != Z_Term_general)
1939 i = atoi_n ((char *) zapt->term->u.general->buf,
1940 zapt->term->u.general->len);
1941 if (i >= sort_sequence->num_specs)
1943 sprintf(termz, "%d", i);
1945 oe.proto = PROTO_Z3950;
1946 oe.oclass = CLASS_ATTSET;
1947 oe.value = attributeSet;
1948 if (!oid_ent_to_oid (&oe, oid))
1951 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1952 sks->sortElement = (Z_SortElement *)
1953 nmem_malloc(stream, sizeof(*sks->sortElement));
1954 sks->sortElement->which = Z_SortElement_generic;
1955 sk = sks->sortElement->u.generic = (Z_SortKey *)
1956 nmem_malloc(stream, sizeof(*sk));
1957 sk->which = Z_SortKey_sortAttributes;
1958 sk->u.sortAttributes = (Z_SortAttributes *)
1959 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1961 sk->u.sortAttributes->id = oid;
1962 sk->u.sortAttributes->list = zapt->attributes;
1964 sks->sortRelation = (int *)
1965 nmem_malloc(stream, sizeof(*sks->sortRelation));
1966 if (sort_relation_value == 1)
1967 *sks->sortRelation = Z_SortKeySpec_ascending;
1968 else if (sort_relation_value == 2)
1969 *sks->sortRelation = Z_SortKeySpec_descending;
1971 *sks->sortRelation = Z_SortKeySpec_ascending;
1973 sks->caseSensitivity = (int *)
1974 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1975 *sks->caseSensitivity = 0;
1977 sks->which = Z_SortKeySpec_null;
1978 sks->u.null = odr_nullval ();
1979 sort_sequence->specs[i] = sks;
1980 *rset = rset_create_null(rset_nmem, kc, 0);
1985 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1986 oid_value attributeSet,
1987 struct xpath_location_step *xpath, int max,
1990 oid_value curAttributeSet = attributeSet;
1992 const char *use_string = 0;
1994 attr_init_APT(&use, zapt, 1);
1995 attr_find_ex(&use, &curAttributeSet, &use_string);
1997 if (!use_string || *use_string != '/')
2000 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2005 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2006 int reg_type, const char *term,
2007 const char *xpath_use,
2009 struct rset_key_control *kc)
2012 struct grep_info grep_info;
2013 char term_dict[2048];
2016 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2017 zinfo_index_category_index,
2020 int ord_len, i, r, max_pos;
2021 int term_type = Z_Term_characterString;
2022 const char *flags = "void";
2024 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2025 return rset_create_null(rset_nmem, kc, 0);
2028 return rset_create_null(rset_nmem, kc, 0);
2030 term_dict[prefix_len++] = '|';
2032 term_dict[prefix_len++] = '(';
2034 ord_len = key_SU_encode (ord, ord_buf);
2035 for (i = 0; i<ord_len; i++)
2037 term_dict[prefix_len++] = 1;
2038 term_dict[prefix_len++] = ord_buf[i];
2040 term_dict[prefix_len++] = ')';
2041 strcpy(term_dict+prefix_len, term);
2043 grep_info.isam_p_indx = 0;
2044 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2045 &grep_info, &max_pos, 0, grep_handle);
2046 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2047 grep_info.isam_p_indx);
2048 rset = rset_trunc(zh, grep_info.isam_p_buf,
2049 grep_info.isam_p_indx, term, strlen(term),
2050 flags, 1, term_type,rset_nmem,
2051 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2052 0 /* term_ref_id_str */);
2053 grep_info_delete(&grep_info);
2058 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2059 int num_bases, char **basenames,
2060 NMEM stream, const char *rank_type, RSET rset,
2061 int xpath_len, struct xpath_location_step *xpath,
2064 struct rset_key_control *kc)
2068 int always_matches = rset ? 0 : 1;
2076 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2077 for (i = 0; i<xpath_len; i++)
2079 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2091 a[@attr = value]/b[@other = othervalue]
2093 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2094 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2095 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2096 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2097 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2098 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2102 dict_grep_cmap (zh->reg->dict, 0, 0);
2104 for (base_no = 0; base_no < num_bases; base_no++)
2106 int level = xpath_len;
2109 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2111 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2112 basenames[base_no]);
2116 while (--level >= 0)
2118 WRBUF xpath_rev = wrbuf_alloc();
2120 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2122 for (i = level; i >= 1; --i)
2124 const char *cp = xpath[i].part;
2130 wrbuf_puts(xpath_rev, "[^/]*");
2131 else if (*cp == ' ')
2132 wrbuf_puts(xpath_rev, "\001 ");
2134 wrbuf_putc(xpath_rev, *cp);
2136 /* wrbuf_putc does not null-terminate , but
2137 wrbuf_puts below ensures it does.. so xpath_rev
2138 is OK iff length is > 0 */
2140 wrbuf_puts(xpath_rev, "/");
2142 else if (i == 1) /* // case */
2143 wrbuf_puts(xpath_rev, ".*");
2145 if (xpath[level].predicate &&
2146 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2147 xpath[level].predicate->u.relation.name[0])
2149 WRBUF wbuf = wrbuf_alloc();
2150 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2151 if (xpath[level].predicate->u.relation.value)
2153 const char *cp = xpath[level].predicate->u.relation.value;
2154 wrbuf_putc(wbuf, '=');
2158 if (strchr(REGEX_CHARS, *cp))
2159 wrbuf_putc(wbuf, '\\');
2160 wrbuf_putc(wbuf, *cp);
2164 wrbuf_puts(wbuf, "");
2165 rset_attr = xpath_trunc(
2166 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2168 wrbuf_free(wbuf, 1);
2174 wrbuf_free(xpath_rev, 1);
2178 yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level,
2179 wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2180 if (wrbuf_len(xpath_rev))
2182 rset_start_tag = xpath_trunc(zh, stream, '0',
2183 wrbuf_buf(xpath_rev),
2184 ZEBRA_XPATH_ELM_BEGIN,
2187 rset = rset_start_tag;
2190 rset_end_tag = xpath_trunc(zh, stream, '0',
2191 wrbuf_buf(xpath_rev),
2192 ZEBRA_XPATH_ELM_END,
2195 rset = rset_create_between(rset_nmem, kc, kc->scope,
2196 rset_start_tag, rset,
2197 rset_end_tag, rset_attr);
2200 wrbuf_free(xpath_rev, 1);
2208 #define MAX_XPATH_STEPS 10
2210 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2211 oid_value attributeSet, NMEM stream,
2212 Z_SortKeySpecList *sort_sequence,
2213 int num_bases, char **basenames,
2216 struct rset_key_control *kc)
2218 ZEBRA_RES res = ZEBRA_OK;
2220 char *search_type = NULL;
2221 char rank_type[128];
2224 char termz[IT_MAX_WORD+1];
2226 const char *xpath_use = 0;
2227 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2231 log_level_rpn = yaz_log_module_level("rpn");
2234 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2235 rank_type, &complete_flag, &sort_flag);
2237 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2238 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2239 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2240 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2242 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2246 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2247 rank_type, rset_nmem, rset, kc);
2248 /* consider if an X-Path query is used */
2249 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2250 xpath, MAX_XPATH_STEPS, stream);
2253 if (xpath[xpath_len-1].part[0] == '@')
2254 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2256 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2263 attr_init_APT(&relation, zapt, 2);
2264 relation_value = attr_find(&relation, NULL);
2266 if (relation_value == 103) /* alwaysmatches */
2268 *rset = 0; /* signal no "term" set */
2269 return rpn_search_xpath(zh, num_bases, basenames,
2270 stream, rank_type, *rset,
2271 xpath_len, xpath, rset_nmem, rset, kc);
2276 /* search using one of the various search type strategies
2277 termz is our UTF-8 search term
2278 attributeSet is top-level default attribute set
2279 stream is ODR for search
2280 reg_id is the register type
2281 complete_flag is 1 for complete subfield, 0 for incomplete
2282 xpath_use is use-attribute to be used for X-Path search, 0 for none
2284 if (!strcmp(search_type, "phrase"))
2286 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2287 reg_id, complete_flag, rank_type,
2289 num_bases, basenames, rset_nmem,
2292 else if (!strcmp(search_type, "and-list"))
2294 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2295 reg_id, complete_flag, rank_type,
2297 num_bases, basenames, rset_nmem,
2300 else if (!strcmp(search_type, "or-list"))
2302 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2303 reg_id, complete_flag, rank_type,
2305 num_bases, basenames, rset_nmem,
2308 else if (!strcmp(search_type, "local"))
2310 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2311 rank_type, rset_nmem, rset, kc);
2313 else if (!strcmp(search_type, "numeric"))
2315 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2316 reg_id, complete_flag, rank_type,
2318 num_bases, basenames, rset_nmem,
2323 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2326 if (res != ZEBRA_OK)
2330 return rpn_search_xpath(zh, num_bases, basenames,
2331 stream, rank_type, *rset,
2332 xpath_len, xpath, rset_nmem, rset, kc);
2335 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2336 oid_value attributeSet,
2337 NMEM stream, NMEM rset_nmem,
2338 Z_SortKeySpecList *sort_sequence,
2339 int num_bases, char **basenames,
2340 RSET **result_sets, int *num_result_sets,
2341 Z_Operator *parent_op,
2342 struct rset_key_control *kc);
2344 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2347 ZEBRA_RES res = ZEBRA_OK;
2348 if (zs->which == Z_RPNStructure_complex)
2350 if (res == ZEBRA_OK)
2351 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2353 if (res == ZEBRA_OK)
2354 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2357 else if (zs->which == Z_RPNStructure_simple)
2359 if (zs->u.simple->which == Z_Operand_APT)
2361 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2362 AttrType global_hits_limit_attr;
2365 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2367 l = attr_find(&global_hits_limit_attr, NULL);
2375 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2376 oid_value attributeSet,
2377 NMEM stream, NMEM rset_nmem,
2378 Z_SortKeySpecList *sort_sequence,
2379 int num_bases, char **basenames,
2382 RSET *result_sets = 0;
2383 int num_result_sets = 0;
2385 struct rset_key_control *kc = zebra_key_control_create(zh);
2387 res = rpn_search_structure(zh, zs, attributeSet,
2390 num_bases, basenames,
2391 &result_sets, &num_result_sets,
2392 0 /* no parent op */,
2394 if (res != ZEBRA_OK)
2397 for (i = 0; i<num_result_sets; i++)
2398 rset_delete(result_sets[i]);
2403 assert(num_result_sets == 1);
2404 assert(result_sets);
2405 assert(*result_sets);
2406 *result_set = *result_sets;
2412 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2413 oid_value attributeSet,
2414 NMEM stream, NMEM rset_nmem,
2415 Z_SortKeySpecList *sort_sequence,
2416 int num_bases, char **basenames,
2417 RSET **result_sets, int *num_result_sets,
2418 Z_Operator *parent_op,
2419 struct rset_key_control *kc)
2421 *num_result_sets = 0;
2422 if (zs->which == Z_RPNStructure_complex)
2425 Z_Operator *zop = zs->u.complex->roperator;
2426 RSET *result_sets_l = 0;
2427 int num_result_sets_l = 0;
2428 RSET *result_sets_r = 0;
2429 int num_result_sets_r = 0;
2431 res = rpn_search_structure(zh, zs->u.complex->s1,
2432 attributeSet, stream, rset_nmem,
2434 num_bases, basenames,
2435 &result_sets_l, &num_result_sets_l,
2437 if (res != ZEBRA_OK)
2440 for (i = 0; i<num_result_sets_l; i++)
2441 rset_delete(result_sets_l[i]);
2444 res = rpn_search_structure(zh, zs->u.complex->s2,
2445 attributeSet, stream, rset_nmem,
2447 num_bases, basenames,
2448 &result_sets_r, &num_result_sets_r,
2450 if (res != ZEBRA_OK)
2453 for (i = 0; i<num_result_sets_l; i++)
2454 rset_delete(result_sets_l[i]);
2455 for (i = 0; i<num_result_sets_r; i++)
2456 rset_delete(result_sets_r[i]);
2460 /* make a new list of result for all children */
2461 *num_result_sets = num_result_sets_l + num_result_sets_r;
2462 *result_sets = nmem_malloc(stream, *num_result_sets *
2463 sizeof(**result_sets));
2464 memcpy(*result_sets, result_sets_l,
2465 num_result_sets_l * sizeof(**result_sets));
2466 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2467 num_result_sets_r * sizeof(**result_sets));
2469 if (!parent_op || parent_op->which != zop->which
2470 || (zop->which != Z_Operator_and &&
2471 zop->which != Z_Operator_or))
2473 /* parent node different from this one (or non-present) */
2474 /* we must combine result sets now */
2478 case Z_Operator_and:
2479 rset = rset_create_and(rset_nmem, kc,
2481 *num_result_sets, *result_sets);
2484 rset = rset_create_or(rset_nmem, kc,
2485 kc->scope, 0, /* termid */
2486 *num_result_sets, *result_sets);
2488 case Z_Operator_and_not:
2489 rset = rset_create_not(rset_nmem, kc,
2494 case Z_Operator_prox:
2495 if (zop->u.prox->which != Z_ProximityOperator_known)
2498 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2502 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2504 zebra_setError_zint(zh,
2505 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2506 *zop->u.prox->u.known);
2511 rset = rset_create_prox(rset_nmem, kc,
2513 *num_result_sets, *result_sets,
2514 *zop->u.prox->ordered,
2515 (!zop->u.prox->exclusion ?
2516 0 : *zop->u.prox->exclusion),
2517 *zop->u.prox->relationType,
2518 *zop->u.prox->distance );
2522 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2525 *num_result_sets = 1;
2526 *result_sets = nmem_malloc(stream, *num_result_sets *
2527 sizeof(**result_sets));
2528 (*result_sets)[0] = rset;
2531 else if (zs->which == Z_RPNStructure_simple)
2536 if (zs->u.simple->which == Z_Operand_APT)
2538 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2539 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2540 attributeSet, stream, sort_sequence,
2541 num_bases, basenames, rset_nmem, &rset,
2543 if (res != ZEBRA_OK)
2546 else if (zs->u.simple->which == Z_Operand_resultSetId)
2548 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2549 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2553 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2554 zs->u.simple->u.resultSetId);
2561 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2564 *num_result_sets = 1;
2565 *result_sets = nmem_malloc(stream, *num_result_sets *
2566 sizeof(**result_sets));
2567 (*result_sets)[0] = rset;
2571 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2582 * indent-tabs-mode: nil
2584 * vim: shiftwidth=4 tabstop=8 expandtab