1 /* $Id: rpnsearch.c,v 1.11 2007-04-16 08:44:32 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65 struct rpn_char_map_info *map_info)
67 map_info->zm = reg->zebra_maps;
68 map_info->reg_type = reg_type;
69 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
87 static int add_isam_p(const char *name, const char *info,
92 log_level_rpn = yaz_log_module_level("rpn");
95 /* we may have to stop this madness.. NOTE: -1 so that if
96 truncmax == trunxlimit we do *not* generate result sets */
97 if (p->isam_p_indx >= p->trunc_max - 1)
100 if (p->isam_p_indx == p->isam_p_size)
102 ISAM_P *new_isam_p_buf;
106 p->isam_p_size = 2*p->isam_p_size + 100;
107 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
111 memcpy(new_isam_p_buf, p->isam_p_buf,
112 p->isam_p_indx * sizeof(*p->isam_p_buf));
113 xfree(p->isam_p_buf);
115 p->isam_p_buf = new_isam_p_buf;
118 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121 memcpy(new_term_no, p->isam_p_buf,
122 p->isam_p_indx * sizeof(*p->term_no));
125 p->term_no = new_term_no;
128 assert(*info == sizeof(*p->isam_p_buf));
129 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
134 char term_tmp[IT_MAX_WORD];
136 const char *index_name;
137 int len = key_SU_decode (&ord, (const unsigned char *) name);
139 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
140 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141 zebraExplain_lookup_ord(p->zh->reg->zei,
142 ord, 0 /* index_type */, &db, &index_name);
143 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
145 resultSetAddTerm(p->zh, p->termset, name[len], db,
146 index_name, term_tmp);
152 static int grep_handle(char *name, const char *info, void *p)
154 return add_isam_p(name, info, (struct grep_info *) p);
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158 const char *ct1, const char *ct2, int first)
160 const char *s1, *s0 = *src;
163 /* skip white space */
166 if (ct1 && strchr(ct1, *s0))
168 if (ct2 && strchr(ct2, *s0))
171 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k<in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " []()|.*+?!"
209 /* term_100: handle term, where trunc = none(no operators at all) */
210 static int term_100(ZebraMaps zebra_maps, int reg_type,
211 const char **src, char *dst, int space_split,
219 const char *space_start = 0;
220 const char *space_end = 0;
222 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
229 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
233 if (**map == *CHR_SPACE)
236 else /* complete subfield only. */
238 if (**map == *CHR_SPACE)
239 { /* save space mapping for later .. */
244 else if (space_start)
245 { /* reload last space */
246 while (space_start < space_end)
248 if (strchr(REGEX_CHARS, *space_start))
250 dst_term[j++] = *space_start;
251 dst[i++] = *space_start++;
254 space_start = space_end = 0;
257 /* add non-space char */
258 memcpy(dst_term+j, s1, s0 - s1);
264 if (strchr(REGEX_CHARS, *s1))
272 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
274 strcpy(dst + i, map[0]);
284 /* term_101: handle term, where trunc = Process # */
285 static int term_101(ZebraMaps zebra_maps, int reg_type,
286 const char **src, char *dst, int space_split,
294 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
303 dst_term[j++] = *s0++;
309 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
311 if (space_split && **map == *CHR_SPACE)
314 /* add non-space char */
315 memcpy(dst_term+j, s1, s0 - s1);
321 if (strchr(REGEX_CHARS, *s1))
329 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
331 strcpy(dst + i, map[0]);
337 dst_term[j++] = '\0';
342 /* term_103: handle term, where trunc = re-2 (regular expressions) */
343 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
344 char *dst, int *errors, int space_split,
352 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
355 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
356 isdigit(((const unsigned char *)s0)[1]))
358 *errors = s0[1] - '0';
365 if (strchr("^\\()[].*+?|-", *s0))
374 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
376 if (space_split && **map == *CHR_SPACE)
379 /* add non-space char */
380 memcpy(dst_term+j, s1, s0 - s1);
386 if (strchr(REGEX_CHARS, *s1))
394 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
396 strcpy(dst + i, map[0]);
408 /* term_103: handle term, where trunc = re-1 (regular expressions) */
409 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
410 char *dst, int space_split, char *dst_term)
412 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
417 /* term_104: handle term, where trunc = Process # and ! */
418 static int term_104(ZebraMaps zebra_maps, int reg_type,
419 const char **src, char *dst, int space_split,
427 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
434 dst_term[j++] = *s0++;
435 if (*s0 >= '0' && *s0 <= '9')
438 while (*s0 >= '0' && *s0 <= '9')
440 limit = limit * 10 + (*s0 - '0');
441 dst_term[j++] = *s0++;
461 dst_term[j++] = *s0++;
466 dst_term[j++] = *s0++;
472 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
474 if (space_split && **map == *CHR_SPACE)
477 /* add non-space char */
478 memcpy(dst_term+j, s1, s0 - s1);
484 if (strchr(REGEX_CHARS, *s1))
492 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
494 strcpy(dst + i, map[0]);
500 dst_term[j++] = '\0';
505 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
506 static int term_105(ZebraMaps zebra_maps, int reg_type,
507 const char **src, char *dst, int space_split,
508 char *dst_term, int right_truncate)
515 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
524 dst_term[j++] = *s0++;
529 dst_term[j++] = *s0++;
535 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
537 if (space_split && **map == *CHR_SPACE)
540 /* add non-space char */
541 memcpy(dst_term+j, s1, s0 - s1);
547 if (strchr(REGEX_CHARS, *s1))
555 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
557 strcpy(dst + i, map[0]);
569 dst_term[j++] = '\0';
575 /* gen_regular_rel - generate regular expression from relation
576 * val: border value (inclusive)
577 * islt: 1 if <=; 0 if >=.
579 static void gen_regular_rel(char *dst, int val, int islt)
586 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
590 strcpy(dst, "(-[0-9]+|(");
598 strcpy(dst, "([0-9]+|-(");
610 sprintf(numstr, "%d", val);
611 for (w = strlen(numstr); --w >= 0; pos++)
630 strcpy(dst + dst_p, numstr);
631 dst_p = strlen(dst) - pos - 1;
659 for (i = 0; i<pos; i++)
672 /* match everything less than 10^(pos-1) */
674 for (i = 1; i<pos; i++)
675 strcat(dst, "[0-9]?");
679 /* match everything greater than 10^pos */
680 for (i = 0; i <= pos; i++)
681 strcat(dst, "[0-9]");
682 strcat(dst, "[0-9]*");
687 void string_rel_add_char(char **term_p, const char *src, int *indx)
689 if (src[*indx] == '\\')
690 *(*term_p)++ = src[(*indx)++];
691 *(*term_p)++ = src[(*indx)++];
695 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
696 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
697 * >= abc ([b-].*|a[c-].*|ab[c-].*)
698 * ([^-a].*|a[^-b].*|ab[c-].*)
699 * < abc ([-0].*|a[-a].*|ab[-b].*)
700 * ([^a-].*|a[^b-].*|ab[^c-].*)
701 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
702 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
704 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
705 const char **term_sub, char *term_dict,
706 const int *attributeSet,
707 int reg_type, int space_split, char *term_dst,
713 char *term_tmp = term_dict + strlen(term_dict);
714 char term_component[2*IT_MAX_WORD+20];
716 attr_init_APT(&relation, zapt, 2);
717 relation_value = attr_find(&relation, NULL);
720 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
721 switch (relation_value)
724 if (!term_100(zh->reg->zebra_maps, reg_type,
725 term_sub, term_component,
726 space_split, term_dst))
728 yaz_log(log_level_rpn, "Relation <");
731 for (i = 0; term_component[i]; )
738 string_rel_add_char(&term_tmp, term_component, &j);
745 *term_tmp++ = FIRST_IN_FIELD_CHAR;
747 string_rel_add_char(&term_tmp, term_component, &i);
754 if ((term_tmp - term_dict) > IT_MAX_WORD)
759 yaz_log(YLOG_LOG, "term_dict=%s", term_dict);
762 if (!term_100(zh->reg->zebra_maps, reg_type,
763 term_sub, term_component,
764 space_split, term_dst))
766 yaz_log(log_level_rpn, "Relation <=");
769 for (i = 0; term_component[i]; )
774 string_rel_add_char(&term_tmp, term_component, &j);
780 *term_tmp++ = FIRST_IN_FIELD_CHAR;
782 string_rel_add_char(&term_tmp, term_component, &i);
791 if ((term_tmp - term_dict) > IT_MAX_WORD)
794 for (i = 0; term_component[i]; )
795 string_rel_add_char(&term_tmp, term_component, &i);
800 if (!term_100 (zh->reg->zebra_maps, reg_type,
801 term_sub, term_component, space_split, term_dst))
803 yaz_log(log_level_rpn, "Relation >");
806 for (i = 0; term_component[i];)
811 string_rel_add_char(&term_tmp, term_component, &j);
816 string_rel_add_char(&term_tmp, term_component, &i);
824 if ((term_tmp - term_dict) > IT_MAX_WORD)
827 for (i = 0; term_component[i];)
828 string_rel_add_char(&term_tmp, term_component, &i);
835 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
836 term_component, space_split, term_dst))
838 yaz_log(log_level_rpn, "Relation >=");
841 for (i = 0; term_component[i];)
848 string_rel_add_char(&term_tmp, term_component, &j);
851 if (term_component[i+1])
855 string_rel_add_char(&term_tmp, term_component, &i);
859 string_rel_add_char(&term_tmp, term_component, &i);
866 if ((term_tmp - term_dict) > IT_MAX_WORD)
877 yaz_log(log_level_rpn, "Relation =");
878 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
879 term_component, space_split, term_dst))
881 strcat(term_tmp, "(");
882 strcat(term_tmp, term_component);
883 strcat(term_tmp, ")");
886 yaz_log(log_level_rpn, "Relation always matches");
887 /* skip to end of term (we don't care what it is) */
888 while (**term_sub != '\0')
892 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
898 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
899 const char **term_sub,
900 const int *attributeSet, NMEM stream,
901 struct grep_info *grep_info,
902 int reg_type, int complete_flag,
903 int num_bases, char **basenames,
905 const char *xpath_use,
906 struct ord_list **ol);
908 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
909 Z_AttributesPlusTerm *zapt,
910 zint *hits_limit_value,
911 const char **term_ref_id_str,
914 AttrType term_ref_id_attr;
915 AttrType hits_limit_attr;
918 attr_init_APT(&hits_limit_attr, zapt, 11);
919 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
921 attr_init_APT(&term_ref_id_attr, zapt, 10);
922 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
923 if (term_ref_id_int >= 0)
925 char *res = nmem_malloc(nmem, 20);
926 sprintf(res, "%d", term_ref_id_int);
927 *term_ref_id_str = res;
930 /* no limit given ? */
931 if (*hits_limit_value == -1)
933 if (*term_ref_id_str)
935 /* use global if term_ref is present */
936 *hits_limit_value = zh->approx_limit;
940 /* no counting if term_ref is not present */
941 *hits_limit_value = 0;
944 else if (*hits_limit_value == 0)
946 /* 0 is the same as global limit */
947 *hits_limit_value = zh->approx_limit;
949 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
950 *term_ref_id_str ? *term_ref_id_str : "none",
955 static ZEBRA_RES term_trunc(ZebraHandle zh,
956 Z_AttributesPlusTerm *zapt,
957 const char **term_sub,
958 const int *attributeSet, NMEM stream,
959 struct grep_info *grep_info,
960 int reg_type, int complete_flag,
961 int num_bases, char **basenames,
963 const char *rank_type,
964 const char *xpath_use,
967 struct rset_key_control *kc)
971 zint hits_limit_value;
972 const char *term_ref_id_str = 0;
975 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
976 grep_info->isam_p_indx = 0;
977 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
978 reg_type, complete_flag, num_bases, basenames,
979 term_dst, xpath_use, &ol);
982 if (!*term_sub) /* no more terms ? */
984 yaz_log(log_level_rpn, "term: %s", term_dst);
985 *rset = rset_trunc(zh, grep_info->isam_p_buf,
986 grep_info->isam_p_indx, term_dst,
987 strlen(term_dst), rank_type, 1 /* preserve pos */,
988 zapt->term->which, rset_nmem,
989 kc, kc->scope, ol, reg_type, hits_limit_value,
996 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
997 const char **term_sub,
998 const int *attributeSet, NMEM stream,
999 struct grep_info *grep_info,
1000 int reg_type, int complete_flag,
1001 int num_bases, char **basenames,
1003 const char *xpath_use,
1004 struct ord_list **ol)
1006 char term_dict[2*IT_MAX_WORD+4000];
1008 AttrType truncation;
1009 int truncation_value;
1011 struct rpn_char_map_info rcmi;
1012 int space_split = complete_flag ? 0 : 1;
1014 int bases_ok = 0; /* no of databases with OK attribute */
1016 *ol = ord_list_create(stream);
1018 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1019 attr_init_APT(&truncation, zapt, 5);
1020 truncation_value = attr_find(&truncation, NULL);
1021 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1023 for (base_no = 0; base_no < num_bases; base_no++)
1026 int regex_range = 0;
1027 int max_pos, prefix_len = 0;
1032 termp = *term_sub; /* start of term for each database */
1034 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1036 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1037 basenames[base_no]);
1041 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1042 attributeSet, &ord) != ZEBRA_OK)
1047 *ol = ord_list_append(stream, *ol, ord);
1048 ord_len = key_SU_encode (ord, ord_buf);
1050 term_dict[prefix_len++] = '(';
1051 for (i = 0; i<ord_len; i++)
1053 term_dict[prefix_len++] = 1; /* our internal regexp escape char */
1054 term_dict[prefix_len++] = ord_buf[i];
1056 term_dict[prefix_len++] = ')';
1057 term_dict[prefix_len] = '\0';
1059 switch (truncation_value)
1061 case -1: /* not specified */
1062 case 100: /* do not truncate */
1063 if (!string_relation(zh, zapt, &termp, term_dict,
1065 reg_type, space_split, term_dst,
1070 zebra_setError(zh, relation_error, 0);
1077 case 1: /* right truncation */
1078 term_dict[j++] = '(';
1079 if (!term_100(zh->reg->zebra_maps, reg_type,
1080 &termp, term_dict + j, space_split, term_dst))
1085 strcat(term_dict, ".*)");
1087 case 2: /* keft truncation */
1088 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1089 if (!term_100(zh->reg->zebra_maps, reg_type,
1090 &termp, term_dict + j, space_split, term_dst))
1095 strcat(term_dict, ")");
1097 case 3: /* left&right truncation */
1098 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1099 if (!term_100(zh->reg->zebra_maps, reg_type,
1100 &termp, term_dict + j, space_split, term_dst))
1105 strcat(term_dict, ".*)");
1107 case 101: /* process # in term */
1108 term_dict[j++] = '(';
1109 if (!term_101(zh->reg->zebra_maps, reg_type,
1110 &termp, term_dict + j, space_split, term_dst))
1115 strcat(term_dict, ")");
1117 case 102: /* Regexp-1 */
1118 term_dict[j++] = '(';
1119 if (!term_102(zh->reg->zebra_maps, reg_type,
1120 &termp, term_dict + j, space_split, term_dst))
1125 strcat(term_dict, ")");
1127 case 103: /* Regexp-2 */
1129 term_dict[j++] = '(';
1130 if (!term_103(zh->reg->zebra_maps, reg_type,
1131 &termp, term_dict + j, ®ex_range,
1132 space_split, term_dst))
1137 strcat(term_dict, ")");
1139 case 104: /* process # and ! in term */
1140 term_dict[j++] = '(';
1141 if (!term_104(zh->reg->zebra_maps, reg_type,
1142 &termp, term_dict + j, space_split, term_dst))
1147 strcat(term_dict, ")");
1149 case 105: /* process * and ! in term */
1150 term_dict[j++] = '(';
1151 if (!term_105(zh->reg->zebra_maps, reg_type,
1152 &termp, term_dict + j, space_split, term_dst, 1))
1157 strcat(term_dict, ")");
1159 case 106: /* process * and ! in term */
1160 term_dict[j++] = '(';
1161 if (!term_105(zh->reg->zebra_maps, reg_type,
1162 &termp, term_dict + j, space_split, term_dst, 0))
1167 strcat(term_dict, ")");
1170 zebra_setError_zint(zh,
1171 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1178 const char *input = term_dict + prefix_len;
1179 esc_str(buf, sizeof(buf), input, strlen(input));
1181 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1182 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1183 grep_info, &max_pos,
1184 ord_len /* number of "exact" chars */,
1187 zebra_set_partial_result(zh);
1189 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1194 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1200 static void grep_info_delete(struct grep_info *grep_info)
1203 xfree(grep_info->term_no);
1205 xfree(grep_info->isam_p_buf);
1208 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1209 Z_AttributesPlusTerm *zapt,
1210 struct grep_info *grep_info,
1214 grep_info->term_no = 0;
1216 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1217 grep_info->isam_p_size = 0;
1218 grep_info->isam_p_buf = NULL;
1220 grep_info->reg_type = reg_type;
1221 grep_info->termset = 0;
1227 attr_init_APT(&truncmax, zapt, 13);
1228 truncmax_value = attr_find(&truncmax, NULL);
1229 if (truncmax_value != -1)
1230 grep_info->trunc_max = truncmax_value;
1235 int termset_value_numeric;
1236 const char *termset_value_string;
1238 attr_init_APT(&termset, zapt, 8);
1239 termset_value_numeric =
1240 attr_find_ex(&termset, NULL, &termset_value_string);
1241 if (termset_value_numeric != -1)
1244 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1248 const char *termset_name = 0;
1249 if (termset_value_numeric != -2)
1252 sprintf(resname, "%d", termset_value_numeric);
1253 termset_name = resname;
1256 termset_name = termset_value_string;
1257 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1258 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1259 if (!grep_info->termset)
1261 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1271 \brief Create result set(s) for list of terms
1272 \param zh Zebra Handle
1273 \param zapt Attributes Plust Term (RPN leaf)
1274 \param termz term as used in query but converted to UTF-8
1275 \param attributeSet default attribute set
1276 \param stream memory for result
1277 \param reg_type register type ('w', 'p',..)
1278 \param complete_flag whether it's phrases or not
1279 \param rank_type term flags for ranking
1280 \param xpath_use use attribute for X-Path (-1 for no X-path)
1281 \param num_bases number of databases
1282 \param basenames array of databases
1283 \param rset_nmem memory for result sets
1284 \param result_sets output result set for each term in list (output)
1285 \param num_result_sets number of output result sets
1286 \param kc rset key control to be used for created result sets
1288 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1289 Z_AttributesPlusTerm *zapt,
1291 const int *attributeSet,
1293 int reg_type, int complete_flag,
1294 const char *rank_type,
1295 const char *xpath_use,
1296 int num_bases, char **basenames,
1298 RSET **result_sets, int *num_result_sets,
1299 struct rset_key_control *kc)
1301 char term_dst[IT_MAX_WORD+1];
1302 struct grep_info grep_info;
1303 const char *termp = termz;
1306 *num_result_sets = 0;
1308 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1314 if (alloc_sets == *num_result_sets)
1317 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1320 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1321 alloc_sets = alloc_sets + add;
1322 *result_sets = rnew;
1324 res = term_trunc(zh, zapt, &termp, attributeSet,
1326 reg_type, complete_flag,
1327 num_bases, basenames,
1328 term_dst, rank_type,
1329 xpath_use, rset_nmem,
1330 &(*result_sets)[*num_result_sets],
1332 if (res != ZEBRA_OK)
1335 for (i = 0; i < *num_result_sets; i++)
1336 rset_delete((*result_sets)[i]);
1337 grep_info_delete (&grep_info);
1340 if ((*result_sets)[*num_result_sets] == 0)
1342 (*num_result_sets)++;
1347 grep_info_delete(&grep_info);
1351 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1352 Z_AttributesPlusTerm *zapt,
1353 const int *attributeSet,
1355 int num_bases, char **basenames,
1358 struct rset_key_control *kc)
1366 attr_init_APT(&position, zapt, 3);
1367 position_value = attr_find(&position, NULL);
1368 switch(position_value)
1377 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1382 if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1384 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1389 if (!zh->reg->isamb && !zh->reg->isamc)
1391 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1395 f_set = xmalloc(sizeof(RSET) * num_bases);
1396 for (base_no = 0; base_no < num_bases; base_no++)
1400 char term_dict[100];
1405 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1407 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1408 basenames[base_no]);
1412 if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1413 attributeSet, &ord) != ZEBRA_OK)
1416 ord_len = key_SU_encode (ord, ord_buf);
1417 memcpy(term_dict, ord_buf, ord_len);
1418 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1419 val = dict_lookup(zh->reg->dict, term_dict);
1422 assert(*val == sizeof(ISAM_P));
1423 memcpy(&isam_p, val+1, sizeof(isam_p));
1427 f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1428 zh->reg->isamb, isam_p, 0);
1429 else if (zh->reg->isamc)
1430 f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1431 zh->reg->isamc, isam_p, 0);
1435 *rset = rset_create_or(rset_nmem, kc, kc->scope,
1436 0 /* termid */, num_sets, f_set);
1442 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1443 Z_AttributesPlusTerm *zapt,
1444 const char *termz_org,
1445 const int *attributeSet,
1447 int reg_type, int complete_flag,
1448 const char *rank_type,
1449 const char *xpath_use,
1450 int num_bases, char **basenames,
1453 struct rset_key_control *kc)
1455 RSET *result_sets = 0;
1456 int num_result_sets = 0;
1458 term_list_trunc(zh, zapt, termz_org, attributeSet,
1459 stream, reg_type, complete_flag,
1460 rank_type, xpath_use,
1461 num_bases, basenames,
1463 &result_sets, &num_result_sets, kc);
1465 if (res != ZEBRA_OK)
1468 if (num_result_sets > 0)
1471 res = rpn_search_APT_position(zh, zapt, attributeSet,
1473 num_bases, basenames,
1474 rset_nmem, &first_set,
1476 if (res != ZEBRA_OK)
1480 RSET *nsets = nmem_malloc(stream,
1481 sizeof(RSET) * (num_result_sets+1));
1482 nsets[0] = first_set;
1483 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1484 result_sets = nsets;
1488 if (num_result_sets == 0)
1489 *rset = rset_create_null(rset_nmem, kc, 0);
1490 else if (num_result_sets == 1)
1491 *rset = result_sets[0];
1493 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1494 num_result_sets, result_sets,
1495 1 /* ordered */, 0 /* exclusion */,
1496 3 /* relation */, 1 /* distance */);
1502 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1503 Z_AttributesPlusTerm *zapt,
1504 const char *termz_org,
1505 const int *attributeSet,
1507 int reg_type, int complete_flag,
1508 const char *rank_type,
1509 const char *xpath_use,
1510 int num_bases, char **basenames,
1513 struct rset_key_control *kc)
1515 RSET *result_sets = 0;
1516 int num_result_sets = 0;
1519 term_list_trunc(zh, zapt, termz_org, attributeSet,
1520 stream, reg_type, complete_flag,
1521 rank_type, xpath_use,
1522 num_bases, basenames,
1524 &result_sets, &num_result_sets, kc);
1525 if (res != ZEBRA_OK)
1528 for (i = 0; i<num_result_sets; i++)
1531 res = rpn_search_APT_position(zh, zapt, attributeSet,
1533 num_bases, basenames,
1534 rset_nmem, &first_set,
1536 if (res != ZEBRA_OK)
1538 for (i = 0; i<num_result_sets; i++)
1539 rset_delete(result_sets[i]);
1547 tmp_set[0] = first_set;
1548 tmp_set[1] = result_sets[i];
1550 result_sets[i] = rset_create_prox(
1551 rset_nmem, kc, kc->scope,
1553 1 /* ordered */, 0 /* exclusion */,
1554 3 /* relation */, 1 /* distance */);
1557 if (num_result_sets == 0)
1558 *rset = rset_create_null(rset_nmem, kc, 0);
1559 else if (num_result_sets == 1)
1560 *rset = result_sets[0];
1562 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1563 num_result_sets, result_sets);
1569 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1570 Z_AttributesPlusTerm *zapt,
1571 const char *termz_org,
1572 const int *attributeSet,
1574 int reg_type, int complete_flag,
1575 const char *rank_type,
1576 const char *xpath_use,
1577 int num_bases, char **basenames,
1580 struct rset_key_control *kc)
1582 RSET *result_sets = 0;
1583 int num_result_sets = 0;
1586 term_list_trunc(zh, zapt, termz_org, attributeSet,
1587 stream, reg_type, complete_flag,
1588 rank_type, xpath_use,
1589 num_bases, basenames,
1591 &result_sets, &num_result_sets,
1593 if (res != ZEBRA_OK)
1595 for (i = 0; i<num_result_sets; i++)
1598 res = rpn_search_APT_position(zh, zapt, attributeSet,
1600 num_bases, basenames,
1601 rset_nmem, &first_set,
1603 if (res != ZEBRA_OK)
1605 for (i = 0; i<num_result_sets; i++)
1606 rset_delete(result_sets[i]);
1614 tmp_set[0] = first_set;
1615 tmp_set[1] = result_sets[i];
1617 result_sets[i] = rset_create_prox(
1618 rset_nmem, kc, kc->scope,
1620 1 /* ordered */, 0 /* exclusion */,
1621 3 /* relation */, 1 /* distance */);
1626 if (num_result_sets == 0)
1627 *rset = rset_create_null(rset_nmem, kc, 0);
1628 else if (num_result_sets == 1)
1629 *rset = result_sets[0];
1631 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1632 num_result_sets, result_sets);
1638 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1639 const char **term_sub,
1641 const int *attributeSet,
1642 struct grep_info *grep_info,
1652 char *term_tmp = term_dict + strlen(term_dict);
1655 attr_init_APT(&relation, zapt, 2);
1656 relation_value = attr_find(&relation, NULL);
1658 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1660 switch (relation_value)
1663 yaz_log(log_level_rpn, "Relation <");
1664 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1667 term_value = atoi (term_tmp);
1668 gen_regular_rel(term_tmp, term_value-1, 1);
1671 yaz_log(log_level_rpn, "Relation <=");
1672 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1675 term_value = atoi (term_tmp);
1676 gen_regular_rel(term_tmp, term_value, 1);
1679 yaz_log(log_level_rpn, "Relation >=");
1680 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1683 term_value = atoi (term_tmp);
1684 gen_regular_rel(term_tmp, term_value, 0);
1687 yaz_log(log_level_rpn, "Relation >");
1688 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1691 term_value = atoi (term_tmp);
1692 gen_regular_rel(term_tmp, term_value+1, 0);
1696 yaz_log(log_level_rpn, "Relation =");
1697 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1700 term_value = atoi (term_tmp);
1701 sprintf(term_tmp, "(0*%d)", term_value);
1704 /* term_tmp untouched.. */
1705 while (**term_sub != '\0')
1709 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1712 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1713 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1717 zebra_set_partial_result(zh);
1719 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1720 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1724 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1725 const char **term_sub,
1726 const int *attributeSet, NMEM stream,
1727 struct grep_info *grep_info,
1728 int reg_type, int complete_flag,
1729 int num_bases, char **basenames,
1731 const char *xpath_use,
1732 struct ord_list **ol)
1734 char term_dict[2*IT_MAX_WORD+2];
1737 struct rpn_char_map_info rcmi;
1739 int bases_ok = 0; /* no of databases with OK attribute */
1741 *ol = ord_list_create(stream);
1743 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1745 for (base_no = 0; base_no < num_bases; base_no++)
1747 int max_pos, prefix_len = 0;
1748 int relation_error = 0;
1749 int ord, ord_len, i;
1754 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1756 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1757 basenames[base_no]);
1761 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1762 attributeSet, &ord) != ZEBRA_OK)
1766 *ol = ord_list_append(stream, *ol, ord);
1768 ord_len = key_SU_encode (ord, ord_buf);
1770 term_dict[prefix_len++] = '(';
1771 for (i = 0; i < ord_len; i++)
1773 term_dict[prefix_len++] = 1;
1774 term_dict[prefix_len++] = ord_buf[i];
1776 term_dict[prefix_len++] = ')';
1777 term_dict[prefix_len] = '\0';
1779 if (!numeric_relation(zh, zapt, &termp, term_dict,
1780 attributeSet, grep_info, &max_pos, reg_type,
1781 term_dst, &relation_error))
1785 zebra_setError(zh, relation_error, 0);
1795 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1800 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1801 Z_AttributesPlusTerm *zapt,
1803 const int *attributeSet,
1805 int reg_type, int complete_flag,
1806 const char *rank_type,
1807 const char *xpath_use,
1808 int num_bases, char **basenames,
1811 struct rset_key_control *kc)
1813 char term_dst[IT_MAX_WORD+1];
1814 const char *termp = termz;
1815 RSET *result_sets = 0;
1816 int num_result_sets = 0;
1818 struct grep_info grep_info;
1820 zint hits_limit_value;
1821 const char *term_ref_id_str = 0;
1823 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1825 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1826 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1830 struct ord_list *ol;
1831 if (alloc_sets == num_result_sets)
1834 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1837 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1838 alloc_sets = alloc_sets + add;
1841 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1842 grep_info.isam_p_indx = 0;
1843 res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1844 reg_type, complete_flag, num_bases, basenames,
1845 term_dst, xpath_use, &ol);
1846 if (res == ZEBRA_FAIL || termp == 0)
1848 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1849 result_sets[num_result_sets] =
1850 rset_trunc(zh, grep_info.isam_p_buf,
1851 grep_info.isam_p_indx, term_dst,
1852 strlen(term_dst), rank_type,
1853 0 /* preserve position */,
1854 zapt->term->which, rset_nmem,
1855 kc, kc->scope, ol, reg_type,
1858 if (!result_sets[num_result_sets])
1864 grep_info_delete(&grep_info);
1866 if (res != ZEBRA_OK)
1868 if (num_result_sets == 0)
1869 *rset = rset_create_null(rset_nmem, kc, 0);
1870 else if (num_result_sets == 1)
1871 *rset = result_sets[0];
1873 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1874 num_result_sets, result_sets);
1880 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1881 Z_AttributesPlusTerm *zapt,
1883 const int *attributeSet,
1885 const char *rank_type, NMEM rset_nmem,
1887 struct rset_key_control *kc)
1890 zint sysno = atozint(termz);
1894 rec = rec_get(zh->reg->records, sysno);
1902 *rset = rset_create_null(rset_nmem, kc, 0);
1908 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1909 res_get(zh->res, "setTmpDir"), 0);
1910 rsfd = rset_open(*rset, RSETF_WRITE);
1915 rset_write(rsfd, &key);
1921 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1922 const int *attributeSet, NMEM stream,
1923 Z_SortKeySpecList *sort_sequence,
1924 const char *rank_type,
1927 struct rset_key_control *kc)
1930 int sort_relation_value;
1931 AttrType sort_relation_type;
1936 attr_init_APT(&sort_relation_type, zapt, 7);
1937 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1939 if (!sort_sequence->specs)
1941 sort_sequence->num_specs = 10;
1942 sort_sequence->specs = (Z_SortKeySpec **)
1943 nmem_malloc(stream, sort_sequence->num_specs *
1944 sizeof(*sort_sequence->specs));
1945 for (i = 0; i<sort_sequence->num_specs; i++)
1946 sort_sequence->specs[i] = 0;
1948 if (zapt->term->which != Z_Term_general)
1951 i = atoi_n ((char *) zapt->term->u.general->buf,
1952 zapt->term->u.general->len);
1953 if (i >= sort_sequence->num_specs)
1955 sprintf(termz, "%d", i);
1957 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1958 sks->sortElement = (Z_SortElement *)
1959 nmem_malloc(stream, sizeof(*sks->sortElement));
1960 sks->sortElement->which = Z_SortElement_generic;
1961 sk = sks->sortElement->u.generic = (Z_SortKey *)
1962 nmem_malloc(stream, sizeof(*sk));
1963 sk->which = Z_SortKey_sortAttributes;
1964 sk->u.sortAttributes = (Z_SortAttributes *)
1965 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1967 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
1968 sk->u.sortAttributes->list = zapt->attributes;
1970 sks->sortRelation = (int *)
1971 nmem_malloc(stream, sizeof(*sks->sortRelation));
1972 if (sort_relation_value == 1)
1973 *sks->sortRelation = Z_SortKeySpec_ascending;
1974 else if (sort_relation_value == 2)
1975 *sks->sortRelation = Z_SortKeySpec_descending;
1977 *sks->sortRelation = Z_SortKeySpec_ascending;
1979 sks->caseSensitivity = (int *)
1980 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1981 *sks->caseSensitivity = 0;
1983 sks->which = Z_SortKeySpec_null;
1984 sks->u.null = odr_nullval ();
1985 sort_sequence->specs[i] = sks;
1986 *rset = rset_create_null(rset_nmem, kc, 0);
1991 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1992 const int *attributeSet,
1993 struct xpath_location_step *xpath, int max,
1996 const int *curAttributeSet = attributeSet;
1998 const char *use_string = 0;
2000 attr_init_APT(&use, zapt, 1);
2001 attr_find_ex(&use, &curAttributeSet, &use_string);
2003 if (!use_string || *use_string != '/')
2006 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2011 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2012 int reg_type, const char *term,
2013 const char *xpath_use,
2015 struct rset_key_control *kc)
2018 struct grep_info grep_info;
2019 char term_dict[2048];
2022 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2023 zinfo_index_category_index,
2026 int ord_len, i, r, max_pos;
2027 int term_type = Z_Term_characterString;
2028 const char *flags = "void";
2030 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2031 return rset_create_null(rset_nmem, kc, 0);
2034 return rset_create_null(rset_nmem, kc, 0);
2036 term_dict[prefix_len++] = '|';
2038 term_dict[prefix_len++] = '(';
2040 ord_len = key_SU_encode (ord, ord_buf);
2041 for (i = 0; i<ord_len; i++)
2043 term_dict[prefix_len++] = 1;
2044 term_dict[prefix_len++] = ord_buf[i];
2046 term_dict[prefix_len++] = ')';
2047 strcpy(term_dict+prefix_len, term);
2049 grep_info.isam_p_indx = 0;
2050 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2051 &grep_info, &max_pos, 0, grep_handle);
2052 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2053 grep_info.isam_p_indx);
2054 rset = rset_trunc(zh, grep_info.isam_p_buf,
2055 grep_info.isam_p_indx, term, strlen(term),
2056 flags, 1, term_type,rset_nmem,
2057 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2058 0 /* term_ref_id_str */);
2059 grep_info_delete(&grep_info);
2064 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2065 int num_bases, char **basenames,
2066 NMEM stream, const char *rank_type, RSET rset,
2067 int xpath_len, struct xpath_location_step *xpath,
2070 struct rset_key_control *kc)
2074 int always_matches = rset ? 0 : 1;
2082 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2083 for (i = 0; i<xpath_len; i++)
2085 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2097 a[@attr = value]/b[@other = othervalue]
2099 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2100 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2101 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2102 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2103 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2104 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2108 dict_grep_cmap (zh->reg->dict, 0, 0);
2110 for (base_no = 0; base_no < num_bases; base_no++)
2112 int level = xpath_len;
2115 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2117 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2118 basenames[base_no]);
2122 while (--level >= 0)
2124 WRBUF xpath_rev = wrbuf_alloc();
2126 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2128 for (i = level; i >= 1; --i)
2130 const char *cp = xpath[i].part;
2136 wrbuf_puts(xpath_rev, "[^/]*");
2137 else if (*cp == ' ')
2138 wrbuf_puts(xpath_rev, "\001 ");
2140 wrbuf_putc(xpath_rev, *cp);
2142 /* wrbuf_putc does not null-terminate , but
2143 wrbuf_puts below ensures it does.. so xpath_rev
2144 is OK iff length is > 0 */
2146 wrbuf_puts(xpath_rev, "/");
2148 else if (i == 1) /* // case */
2149 wrbuf_puts(xpath_rev, ".*");
2151 if (xpath[level].predicate &&
2152 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2153 xpath[level].predicate->u.relation.name[0])
2155 WRBUF wbuf = wrbuf_alloc();
2156 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2157 if (xpath[level].predicate->u.relation.value)
2159 const char *cp = xpath[level].predicate->u.relation.value;
2160 wrbuf_putc(wbuf, '=');
2164 if (strchr(REGEX_CHARS, *cp))
2165 wrbuf_putc(wbuf, '\\');
2166 wrbuf_putc(wbuf, *cp);
2170 rset_attr = xpath_trunc(
2171 zh, stream, '0', wrbuf_cstr(wbuf), ZEBRA_XPATH_ATTR_NAME,
2173 wrbuf_destroy(wbuf);
2179 wrbuf_destroy(xpath_rev);
2183 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2184 wrbuf_cstr(xpath_rev));
2185 if (wrbuf_len(xpath_rev))
2187 rset_start_tag = xpath_trunc(zh, stream, '0',
2188 wrbuf_cstr(xpath_rev),
2189 ZEBRA_XPATH_ELM_BEGIN,
2192 rset = rset_start_tag;
2195 rset_end_tag = xpath_trunc(zh, stream, '0',
2196 wrbuf_cstr(xpath_rev),
2197 ZEBRA_XPATH_ELM_END,
2200 rset = rset_create_between(rset_nmem, kc, kc->scope,
2201 rset_start_tag, rset,
2202 rset_end_tag, rset_attr);
2205 wrbuf_destroy(xpath_rev);
2213 #define MAX_XPATH_STEPS 10
2215 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2216 const int *attributeSet, NMEM stream,
2217 Z_SortKeySpecList *sort_sequence,
2218 int num_bases, char **basenames,
2221 struct rset_key_control *kc)
2223 ZEBRA_RES res = ZEBRA_OK;
2225 char *search_type = NULL;
2226 char rank_type[128];
2229 char termz[IT_MAX_WORD+1];
2231 const char *xpath_use = 0;
2232 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2236 log_level_rpn = yaz_log_module_level("rpn");
2239 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2240 rank_type, &complete_flag, &sort_flag);
2242 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2243 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2244 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2245 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2247 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2251 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2252 rank_type, rset_nmem, rset, kc);
2253 /* consider if an X-Path query is used */
2254 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2255 xpath, MAX_XPATH_STEPS, stream);
2258 if (xpath[xpath_len-1].part[0] == '@')
2259 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2261 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2268 attr_init_APT(&relation, zapt, 2);
2269 relation_value = attr_find(&relation, NULL);
2271 if (relation_value == 103) /* alwaysmatches */
2273 *rset = 0; /* signal no "term" set */
2274 return rpn_search_xpath(zh, num_bases, basenames,
2275 stream, rank_type, *rset,
2276 xpath_len, xpath, rset_nmem, rset, kc);
2281 /* search using one of the various search type strategies
2282 termz is our UTF-8 search term
2283 attributeSet is top-level default attribute set
2284 stream is ODR for search
2285 reg_id is the register type
2286 complete_flag is 1 for complete subfield, 0 for incomplete
2287 xpath_use is use-attribute to be used for X-Path search, 0 for none
2289 if (!strcmp(search_type, "phrase"))
2291 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2292 reg_id, complete_flag, rank_type,
2294 num_bases, basenames, rset_nmem,
2297 else if (!strcmp(search_type, "and-list"))
2299 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2300 reg_id, complete_flag, rank_type,
2302 num_bases, basenames, rset_nmem,
2305 else if (!strcmp(search_type, "or-list"))
2307 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2308 reg_id, complete_flag, rank_type,
2310 num_bases, basenames, rset_nmem,
2313 else if (!strcmp(search_type, "local"))
2315 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2316 rank_type, rset_nmem, rset, kc);
2318 else if (!strcmp(search_type, "numeric"))
2320 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2321 reg_id, complete_flag, rank_type,
2323 num_bases, basenames, rset_nmem,
2328 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2331 if (res != ZEBRA_OK)
2335 return rpn_search_xpath(zh, num_bases, basenames,
2336 stream, rank_type, *rset,
2337 xpath_len, xpath, rset_nmem, rset, kc);
2340 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2341 const int *attributeSet,
2342 NMEM stream, NMEM rset_nmem,
2343 Z_SortKeySpecList *sort_sequence,
2344 int num_bases, char **basenames,
2345 RSET **result_sets, int *num_result_sets,
2346 Z_Operator *parent_op,
2347 struct rset_key_control *kc);
2349 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2352 ZEBRA_RES res = ZEBRA_OK;
2353 if (zs->which == Z_RPNStructure_complex)
2355 if (res == ZEBRA_OK)
2356 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2358 if (res == ZEBRA_OK)
2359 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2362 else if (zs->which == Z_RPNStructure_simple)
2364 if (zs->u.simple->which == Z_Operand_APT)
2366 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2367 AttrType global_hits_limit_attr;
2370 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2372 l = attr_find(&global_hits_limit_attr, NULL);
2380 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2381 const int *attributeSet,
2382 NMEM stream, NMEM rset_nmem,
2383 Z_SortKeySpecList *sort_sequence,
2384 int num_bases, char **basenames,
2387 RSET *result_sets = 0;
2388 int num_result_sets = 0;
2390 struct rset_key_control *kc = zebra_key_control_create(zh);
2392 res = rpn_search_structure(zh, zs, attributeSet,
2395 num_bases, basenames,
2396 &result_sets, &num_result_sets,
2397 0 /* no parent op */,
2399 if (res != ZEBRA_OK)
2402 for (i = 0; i<num_result_sets; i++)
2403 rset_delete(result_sets[i]);
2408 assert(num_result_sets == 1);
2409 assert(result_sets);
2410 assert(*result_sets);
2411 *result_set = *result_sets;
2417 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2418 const int *attributeSet,
2419 NMEM stream, NMEM rset_nmem,
2420 Z_SortKeySpecList *sort_sequence,
2421 int num_bases, char **basenames,
2422 RSET **result_sets, int *num_result_sets,
2423 Z_Operator *parent_op,
2424 struct rset_key_control *kc)
2426 *num_result_sets = 0;
2427 if (zs->which == Z_RPNStructure_complex)
2430 Z_Operator *zop = zs->u.complex->roperator;
2431 RSET *result_sets_l = 0;
2432 int num_result_sets_l = 0;
2433 RSET *result_sets_r = 0;
2434 int num_result_sets_r = 0;
2436 res = rpn_search_structure(zh, zs->u.complex->s1,
2437 attributeSet, stream, rset_nmem,
2439 num_bases, basenames,
2440 &result_sets_l, &num_result_sets_l,
2442 if (res != ZEBRA_OK)
2445 for (i = 0; i<num_result_sets_l; i++)
2446 rset_delete(result_sets_l[i]);
2449 res = rpn_search_structure(zh, zs->u.complex->s2,
2450 attributeSet, stream, rset_nmem,
2452 num_bases, basenames,
2453 &result_sets_r, &num_result_sets_r,
2455 if (res != ZEBRA_OK)
2458 for (i = 0; i<num_result_sets_l; i++)
2459 rset_delete(result_sets_l[i]);
2460 for (i = 0; i<num_result_sets_r; i++)
2461 rset_delete(result_sets_r[i]);
2465 /* make a new list of result for all children */
2466 *num_result_sets = num_result_sets_l + num_result_sets_r;
2467 *result_sets = nmem_malloc(stream, *num_result_sets *
2468 sizeof(**result_sets));
2469 memcpy(*result_sets, result_sets_l,
2470 num_result_sets_l * sizeof(**result_sets));
2471 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2472 num_result_sets_r * sizeof(**result_sets));
2474 if (!parent_op || parent_op->which != zop->which
2475 || (zop->which != Z_Operator_and &&
2476 zop->which != Z_Operator_or))
2478 /* parent node different from this one (or non-present) */
2479 /* we must combine result sets now */
2483 case Z_Operator_and:
2484 rset = rset_create_and(rset_nmem, kc,
2486 *num_result_sets, *result_sets);
2489 rset = rset_create_or(rset_nmem, kc,
2490 kc->scope, 0, /* termid */
2491 *num_result_sets, *result_sets);
2493 case Z_Operator_and_not:
2494 rset = rset_create_not(rset_nmem, kc,
2499 case Z_Operator_prox:
2500 if (zop->u.prox->which != Z_ProximityOperator_known)
2503 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2507 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2509 zebra_setError_zint(zh,
2510 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2511 *zop->u.prox->u.known);
2516 rset = rset_create_prox(rset_nmem, kc,
2518 *num_result_sets, *result_sets,
2519 *zop->u.prox->ordered,
2520 (!zop->u.prox->exclusion ?
2521 0 : *zop->u.prox->exclusion),
2522 *zop->u.prox->relationType,
2523 *zop->u.prox->distance );
2527 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2530 *num_result_sets = 1;
2531 *result_sets = nmem_malloc(stream, *num_result_sets *
2532 sizeof(**result_sets));
2533 (*result_sets)[0] = rset;
2536 else if (zs->which == Z_RPNStructure_simple)
2541 if (zs->u.simple->which == Z_Operand_APT)
2543 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2544 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2545 attributeSet, stream, sort_sequence,
2546 num_bases, basenames, rset_nmem, &rset,
2548 if (res != ZEBRA_OK)
2551 else if (zs->u.simple->which == Z_Operand_resultSetId)
2553 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2554 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2558 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2559 zs->u.simple->u.resultSetId);
2566 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2569 *num_result_sets = 1;
2570 *result_sets = nmem_malloc(stream, *num_result_sets *
2571 sizeof(**result_sets));
2572 (*result_sets)[0] = rset;
2576 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2587 * indent-tabs-mode: nil
2589 * vim: shiftwidth=4 tabstop=8 expandtab