1 /* $Id: rpnsearch.c,v 1.7 2007-01-16 15:31:23 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65 struct rpn_char_map_info *map_info)
67 map_info->zm = reg->zebra_maps;
68 map_info->reg_type = reg_type;
69 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
87 static int add_isam_p(const char *name, const char *info,
92 log_level_rpn = yaz_log_module_level("rpn");
95 /* we may have to stop this madness.. NOTE: -1 so that if
96 truncmax == trunxlimit we do *not* generate result sets */
97 if (p->isam_p_indx >= p->trunc_max - 1)
100 if (p->isam_p_indx == p->isam_p_size)
102 ISAM_P *new_isam_p_buf;
106 p->isam_p_size = 2*p->isam_p_size + 100;
107 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
111 memcpy(new_isam_p_buf, p->isam_p_buf,
112 p->isam_p_indx * sizeof(*p->isam_p_buf));
113 xfree(p->isam_p_buf);
115 p->isam_p_buf = new_isam_p_buf;
118 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121 memcpy(new_term_no, p->isam_p_buf,
122 p->isam_p_indx * sizeof(*p->term_no));
125 p->term_no = new_term_no;
128 assert(*info == sizeof(*p->isam_p_buf));
129 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
134 char term_tmp[IT_MAX_WORD];
136 const char *index_name;
137 int len = key_SU_decode (&ord, (const unsigned char *) name);
139 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
140 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141 zebraExplain_lookup_ord(p->zh->reg->zei,
142 ord, 0 /* index_type */, &db, &index_name);
143 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
145 resultSetAddTerm(p->zh, p->termset, name[len], db,
146 index_name, term_tmp);
152 static int grep_handle(char *name, const char *info, void *p)
154 return add_isam_p(name, info, (struct grep_info *) p);
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158 const char *ct1, const char *ct2, int first)
160 const char *s1, *s0 = *src;
163 /* skip white space */
166 if (ct1 && strchr(ct1, *s0))
168 if (ct2 && strchr(ct2, *s0))
171 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k<in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " []()|.*+?!"
209 /* term_100: handle term, where trunc = none(no operators at all) */
210 static int term_100(ZebraMaps zebra_maps, int reg_type,
211 const char **src, char *dst, int space_split,
219 const char *space_start = 0;
220 const char *space_end = 0;
222 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
229 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
233 if (**map == *CHR_SPACE)
236 else /* complete subfield only. */
238 if (**map == *CHR_SPACE)
239 { /* save space mapping for later .. */
244 else if (space_start)
245 { /* reload last space */
246 while (space_start < space_end)
248 if (strchr(REGEX_CHARS, *space_start))
250 dst_term[j++] = *space_start;
251 dst[i++] = *space_start++;
254 space_start = space_end = 0;
257 /* add non-space char */
258 memcpy(dst_term+j, s1, s0 - s1);
264 if (strchr(REGEX_CHARS, *s1))
272 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
274 strcpy(dst + i, map[0]);
284 /* term_101: handle term, where trunc = Process # */
285 static int term_101(ZebraMaps zebra_maps, int reg_type,
286 const char **src, char *dst, int space_split,
294 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
303 dst_term[j++] = *s0++;
309 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
311 if (space_split && **map == *CHR_SPACE)
314 /* add non-space char */
315 memcpy(dst_term+j, s1, s0 - s1);
321 if (strchr(REGEX_CHARS, *s1))
329 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
331 strcpy(dst + i, map[0]);
337 dst_term[j++] = '\0';
342 /* term_103: handle term, where trunc = re-2 (regular expressions) */
343 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
344 char *dst, int *errors, int space_split,
352 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
355 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
356 isdigit(((const unsigned char *)s0)[1]))
358 *errors = s0[1] - '0';
365 if (strchr("^\\()[].*+?|-", *s0))
374 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
376 if (space_split && **map == *CHR_SPACE)
379 /* add non-space char */
380 memcpy(dst_term+j, s1, s0 - s1);
386 if (strchr(REGEX_CHARS, *s1))
394 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
396 strcpy(dst + i, map[0]);
408 /* term_103: handle term, where trunc = re-1 (regular expressions) */
409 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
410 char *dst, int space_split, char *dst_term)
412 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
417 /* term_104: handle term, where trunc = Process # and ! */
418 static int term_104(ZebraMaps zebra_maps, int reg_type,
419 const char **src, char *dst, int space_split,
427 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
434 dst_term[j++] = *s0++;
435 if (*s0 >= '0' && *s0 <= '9')
438 while (*s0 >= '0' && *s0 <= '9')
440 limit = limit * 10 + (*s0 - '0');
441 dst_term[j++] = *s0++;
461 dst_term[j++] = *s0++;
466 dst_term[j++] = *s0++;
472 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
474 if (space_split && **map == *CHR_SPACE)
477 /* add non-space char */
478 memcpy(dst_term+j, s1, s0 - s1);
484 if (strchr(REGEX_CHARS, *s1))
492 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
494 strcpy(dst + i, map[0]);
500 dst_term[j++] = '\0';
505 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
506 static int term_105(ZebraMaps zebra_maps, int reg_type,
507 const char **src, char *dst, int space_split,
508 char *dst_term, int right_truncate)
515 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
524 dst_term[j++] = *s0++;
529 dst_term[j++] = *s0++;
535 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
537 if (space_split && **map == *CHR_SPACE)
540 /* add non-space char */
541 memcpy(dst_term+j, s1, s0 - s1);
547 if (strchr(REGEX_CHARS, *s1))
555 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
557 strcpy(dst + i, map[0]);
569 dst_term[j++] = '\0';
575 /* gen_regular_rel - generate regular expression from relation
576 * val: border value (inclusive)
577 * islt: 1 if <=; 0 if >=.
579 static void gen_regular_rel(char *dst, int val, int islt)
586 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
590 strcpy(dst, "(-[0-9]+|(");
598 strcpy(dst, "([0-9]+|-(");
610 sprintf(numstr, "%d", val);
611 for (w = strlen(numstr); --w >= 0; pos++)
630 strcpy(dst + dst_p, numstr);
631 dst_p = strlen(dst) - pos - 1;
659 for (i = 0; i<pos; i++)
672 /* match everything less than 10^(pos-1) */
674 for (i = 1; i<pos; i++)
675 strcat(dst, "[0-9]?");
679 /* match everything greater than 10^pos */
680 for (i = 0; i <= pos; i++)
681 strcat(dst, "[0-9]");
682 strcat(dst, "[0-9]*");
687 void string_rel_add_char(char **term_p, const char *src, int *indx)
689 if (src[*indx] == '\\')
690 *(*term_p)++ = src[(*indx)++];
691 *(*term_p)++ = src[(*indx)++];
695 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
696 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
697 * >= abc ([b-].*|a[c-].*|ab[c-].*)
698 * ([^-a].*|a[^-b].*|ab[c-].*)
699 * < abc ([-0].*|a[-a].*|ab[-b].*)
700 * ([^a-].*|a[^b-].*|ab[^c-].*)
701 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
702 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
704 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
705 const char **term_sub, char *term_dict,
706 oid_value attributeSet,
707 int reg_type, int space_split, char *term_dst,
713 char *term_tmp = term_dict + strlen(term_dict);
714 char term_component[2*IT_MAX_WORD+20];
716 attr_init_APT(&relation, zapt, 2);
717 relation_value = attr_find(&relation, NULL);
720 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
721 switch (relation_value)
724 if (!term_100(zh->reg->zebra_maps, reg_type,
725 term_sub, term_component,
726 space_split, term_dst))
728 yaz_log(log_level_rpn, "Relation <");
731 for (i = 0; term_component[i]; )
738 string_rel_add_char(&term_tmp, term_component, &j);
745 *term_tmp++ = FIRST_IN_FIELD_CHAR;
747 string_rel_add_char(&term_tmp, term_component, &i);
754 if ((term_tmp - term_dict) > IT_MAX_WORD)
759 yaz_log(YLOG_LOG, "term_dict=%s", term_dict);
762 if (!term_100(zh->reg->zebra_maps, reg_type,
763 term_sub, term_component,
764 space_split, term_dst))
766 yaz_log(log_level_rpn, "Relation <=");
769 for (i = 0; term_component[i]; )
774 string_rel_add_char(&term_tmp, term_component, &j);
780 *term_tmp++ = FIRST_IN_FIELD_CHAR;
782 string_rel_add_char(&term_tmp, term_component, &i);
791 if ((term_tmp - term_dict) > IT_MAX_WORD)
794 for (i = 0; term_component[i]; )
795 string_rel_add_char(&term_tmp, term_component, &i);
800 if (!term_100 (zh->reg->zebra_maps, reg_type,
801 term_sub, term_component, space_split, term_dst))
803 yaz_log(log_level_rpn, "Relation >");
806 for (i = 0; term_component[i];)
811 string_rel_add_char(&term_tmp, term_component, &j);
816 string_rel_add_char(&term_tmp, term_component, &i);
824 if ((term_tmp - term_dict) > IT_MAX_WORD)
827 for (i = 0; term_component[i];)
828 string_rel_add_char(&term_tmp, term_component, &i);
835 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
836 term_component, space_split, term_dst))
838 yaz_log(log_level_rpn, "Relation >=");
841 for (i = 0; term_component[i];)
848 string_rel_add_char(&term_tmp, term_component, &j);
851 if (term_component[i+1])
855 string_rel_add_char(&term_tmp, term_component, &i);
859 string_rel_add_char(&term_tmp, term_component, &i);
866 if ((term_tmp - term_dict) > IT_MAX_WORD)
877 yaz_log(log_level_rpn, "Relation =");
878 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
879 term_component, space_split, term_dst))
881 strcat(term_tmp, "(");
882 strcat(term_tmp, term_component);
883 strcat(term_tmp, ")");
886 yaz_log(log_level_rpn, "Relation always matches");
887 /* skip to end of term (we don't care what it is) */
888 while (**term_sub != '\0')
892 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
898 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
899 const char **term_sub,
900 oid_value attributeSet, NMEM stream,
901 struct grep_info *grep_info,
902 int reg_type, int complete_flag,
903 int num_bases, char **basenames,
905 const char *xpath_use,
906 struct ord_list **ol);
908 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
909 Z_AttributesPlusTerm *zapt,
910 zint *hits_limit_value,
911 const char **term_ref_id_str,
914 AttrType term_ref_id_attr;
915 AttrType hits_limit_attr;
918 attr_init_APT(&hits_limit_attr, zapt, 11);
919 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
921 attr_init_APT(&term_ref_id_attr, zapt, 10);
922 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
923 if (term_ref_id_int >= 0)
925 char *res = nmem_malloc(nmem, 20);
926 sprintf(res, "%d", term_ref_id_int);
927 *term_ref_id_str = res;
930 /* no limit given ? */
931 if (*hits_limit_value == -1)
933 if (*term_ref_id_str)
935 /* use global if term_ref is present */
936 *hits_limit_value = zh->approx_limit;
940 /* no counting if term_ref is not present */
941 *hits_limit_value = 0;
944 else if (*hits_limit_value == 0)
946 /* 0 is the same as global limit */
947 *hits_limit_value = zh->approx_limit;
949 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
950 *term_ref_id_str ? *term_ref_id_str : "none",
955 static ZEBRA_RES term_trunc(ZebraHandle zh,
956 Z_AttributesPlusTerm *zapt,
957 const char **term_sub,
958 oid_value attributeSet, NMEM stream,
959 struct grep_info *grep_info,
960 int reg_type, int complete_flag,
961 int num_bases, char **basenames,
963 const char *rank_type,
964 const char *xpath_use,
967 struct rset_key_control *kc)
971 zint hits_limit_value;
972 const char *term_ref_id_str = 0;
975 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
976 grep_info->isam_p_indx = 0;
977 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
978 reg_type, complete_flag, num_bases, basenames,
979 term_dst, xpath_use, &ol);
982 if (!*term_sub) /* no more terms ? */
984 yaz_log(log_level_rpn, "term: %s", term_dst);
985 *rset = rset_trunc(zh, grep_info->isam_p_buf,
986 grep_info->isam_p_indx, term_dst,
987 strlen(term_dst), rank_type, 1 /* preserve pos */,
988 zapt->term->which, rset_nmem,
989 kc, kc->scope, ol, reg_type, hits_limit_value,
996 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
997 const char **term_sub,
998 oid_value attributeSet, NMEM stream,
999 struct grep_info *grep_info,
1000 int reg_type, int complete_flag,
1001 int num_bases, char **basenames,
1003 const char *xpath_use,
1004 struct ord_list **ol)
1006 char term_dict[2*IT_MAX_WORD+4000];
1008 AttrType truncation;
1009 int truncation_value;
1011 struct rpn_char_map_info rcmi;
1012 int space_split = complete_flag ? 0 : 1;
1014 int bases_ok = 0; /* no of databases with OK attribute */
1016 *ol = ord_list_create(stream);
1018 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1019 attr_init_APT(&truncation, zapt, 5);
1020 truncation_value = attr_find(&truncation, NULL);
1021 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1023 for (base_no = 0; base_no < num_bases; base_no++)
1026 int regex_range = 0;
1027 int max_pos, prefix_len = 0;
1032 termp = *term_sub; /* start of term for each database */
1034 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1036 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1037 basenames[base_no]);
1041 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1042 attributeSet, &ord) != ZEBRA_OK)
1047 *ol = ord_list_append(stream, *ol, ord);
1048 ord_len = key_SU_encode (ord, ord_buf);
1050 term_dict[prefix_len++] = '(';
1051 for (i = 0; i<ord_len; i++)
1053 term_dict[prefix_len++] = 1; /* our internal regexp escape char */
1054 term_dict[prefix_len++] = ord_buf[i];
1056 term_dict[prefix_len++] = ')';
1057 term_dict[prefix_len] = '\0';
1059 switch (truncation_value)
1061 case -1: /* not specified */
1062 case 100: /* do not truncate */
1063 if (!string_relation(zh, zapt, &termp, term_dict,
1065 reg_type, space_split, term_dst,
1070 zebra_setError(zh, relation_error, 0);
1077 case 1: /* right truncation */
1078 term_dict[j++] = '(';
1079 if (!term_100(zh->reg->zebra_maps, reg_type,
1080 &termp, term_dict + j, space_split, term_dst))
1085 strcat(term_dict, ".*)");
1087 case 2: /* keft truncation */
1088 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1089 if (!term_100(zh->reg->zebra_maps, reg_type,
1090 &termp, term_dict + j, space_split, term_dst))
1095 strcat(term_dict, ")");
1097 case 3: /* left&right truncation */
1098 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1099 if (!term_100(zh->reg->zebra_maps, reg_type,
1100 &termp, term_dict + j, space_split, term_dst))
1105 strcat(term_dict, ".*)");
1107 case 101: /* process # in term */
1108 term_dict[j++] = '(';
1109 if (!term_101(zh->reg->zebra_maps, reg_type,
1110 &termp, term_dict + j, space_split, term_dst))
1115 strcat(term_dict, ")");
1117 case 102: /* Regexp-1 */
1118 term_dict[j++] = '(';
1119 if (!term_102(zh->reg->zebra_maps, reg_type,
1120 &termp, term_dict + j, space_split, term_dst))
1125 strcat(term_dict, ")");
1127 case 103: /* Regexp-2 */
1129 term_dict[j++] = '(';
1130 if (!term_103(zh->reg->zebra_maps, reg_type,
1131 &termp, term_dict + j, ®ex_range,
1132 space_split, term_dst))
1137 strcat(term_dict, ")");
1139 case 104: /* process # and ! in term */
1140 term_dict[j++] = '(';
1141 if (!term_104(zh->reg->zebra_maps, reg_type,
1142 &termp, term_dict + j, space_split, term_dst))
1147 strcat(term_dict, ")");
1149 case 105: /* process * and ! in term */
1150 term_dict[j++] = '(';
1151 if (!term_105(zh->reg->zebra_maps, reg_type,
1152 &termp, term_dict + j, space_split, term_dst, 1))
1157 strcat(term_dict, ")");
1159 case 106: /* process * and ! in term */
1160 term_dict[j++] = '(';
1161 if (!term_105(zh->reg->zebra_maps, reg_type,
1162 &termp, term_dict + j, space_split, term_dst, 0))
1167 strcat(term_dict, ")");
1170 zebra_setError_zint(zh,
1171 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1178 const char *input = term_dict + prefix_len;
1179 esc_str(buf, sizeof(buf), input, strlen(input));
1181 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1182 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1183 grep_info, &max_pos,
1184 ord_len /* number of "exact" chars */,
1187 zebra_set_partial_result(zh);
1189 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1194 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1200 static void grep_info_delete(struct grep_info *grep_info)
1203 xfree(grep_info->term_no);
1205 xfree(grep_info->isam_p_buf);
1208 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1209 Z_AttributesPlusTerm *zapt,
1210 struct grep_info *grep_info,
1214 int termset_value_numeric;
1215 const char *termset_value_string;
1218 grep_info->term_no = 0;
1220 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1221 grep_info->isam_p_size = 0;
1222 grep_info->isam_p_buf = NULL;
1224 grep_info->reg_type = reg_type;
1225 grep_info->termset = 0;
1228 attr_init_APT(&termset, zapt, 8);
1229 termset_value_numeric =
1230 attr_find_ex(&termset, NULL, &termset_value_string);
1231 if (termset_value_numeric != -1)
1234 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1238 const char *termset_name = 0;
1239 if (termset_value_numeric != -2)
1242 sprintf(resname, "%d", termset_value_numeric);
1243 termset_name = resname;
1246 termset_name = termset_value_string;
1247 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1248 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1249 if (!grep_info->termset)
1251 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1260 \brief Create result set(s) for list of terms
1261 \param zh Zebra Handle
1262 \param zapt Attributes Plust Term (RPN leaf)
1263 \param termz term as used in query but converted to UTF-8
1264 \param attributeSet default attribute set
1265 \param stream memory for result
1266 \param reg_type register type ('w', 'p',..)
1267 \param complete_flag whether it's phrases or not
1268 \param rank_type term flags for ranking
1269 \param xpath_use use attribute for X-Path (-1 for no X-path)
1270 \param num_bases number of databases
1271 \param basenames array of databases
1272 \param rset_nmem memory for result sets
1273 \param result_sets output result set for each term in list (output)
1274 \param num_result_sets number of output result sets
1275 \param kc rset key control to be used for created result sets
1277 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1278 Z_AttributesPlusTerm *zapt,
1280 oid_value attributeSet,
1282 int reg_type, int complete_flag,
1283 const char *rank_type,
1284 const char *xpath_use,
1285 int num_bases, char **basenames,
1287 RSET **result_sets, int *num_result_sets,
1288 struct rset_key_control *kc)
1290 char term_dst[IT_MAX_WORD+1];
1291 struct grep_info grep_info;
1292 const char *termp = termz;
1295 *num_result_sets = 0;
1297 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1303 if (alloc_sets == *num_result_sets)
1306 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1309 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1310 alloc_sets = alloc_sets + add;
1311 *result_sets = rnew;
1313 res = term_trunc(zh, zapt, &termp, attributeSet,
1315 reg_type, complete_flag,
1316 num_bases, basenames,
1317 term_dst, rank_type,
1318 xpath_use, rset_nmem,
1319 &(*result_sets)[*num_result_sets],
1321 if (res != ZEBRA_OK)
1324 for (i = 0; i < *num_result_sets; i++)
1325 rset_delete((*result_sets)[i]);
1326 grep_info_delete (&grep_info);
1329 if ((*result_sets)[*num_result_sets] == 0)
1331 (*num_result_sets)++;
1336 grep_info_delete(&grep_info);
1340 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1341 Z_AttributesPlusTerm *zapt,
1342 oid_value attributeSet,
1344 int num_bases, char **basenames,
1347 struct rset_key_control *kc)
1355 attr_init_APT(&position, zapt, 3);
1356 position_value = attr_find(&position, NULL);
1357 switch(position_value)
1366 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1371 if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1373 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1378 if (!zh->reg->isamb && !zh->reg->isamc)
1380 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1384 f_set = xmalloc(sizeof(RSET) * num_bases);
1385 for (base_no = 0; base_no < num_bases; base_no++)
1389 char term_dict[100];
1394 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1396 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1397 basenames[base_no]);
1401 if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1402 attributeSet, &ord) != ZEBRA_OK)
1405 ord_len = key_SU_encode (ord, ord_buf);
1406 memcpy(term_dict, ord_buf, ord_len);
1407 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1408 val = dict_lookup(zh->reg->dict, term_dict);
1411 assert(*val == sizeof(ISAM_P));
1412 memcpy(&isam_p, val+1, sizeof(isam_p));
1416 f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1417 zh->reg->isamb, isam_p, 0);
1418 else if (zh->reg->isamc)
1419 f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1420 zh->reg->isamc, isam_p, 0);
1424 *rset = rset_create_or(rset_nmem, kc, kc->scope,
1425 0 /* termid */, num_sets, f_set);
1431 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1432 Z_AttributesPlusTerm *zapt,
1433 const char *termz_org,
1434 oid_value attributeSet,
1436 int reg_type, int complete_flag,
1437 const char *rank_type,
1438 const char *xpath_use,
1439 int num_bases, char **basenames,
1442 struct rset_key_control *kc)
1444 RSET *result_sets = 0;
1445 int num_result_sets = 0;
1447 term_list_trunc(zh, zapt, termz_org, attributeSet,
1448 stream, reg_type, complete_flag,
1449 rank_type, xpath_use,
1450 num_bases, basenames,
1452 &result_sets, &num_result_sets, kc);
1454 if (res != ZEBRA_OK)
1457 if (num_result_sets > 0)
1460 res = rpn_search_APT_position(zh, zapt, attributeSet,
1462 num_bases, basenames,
1463 rset_nmem, &first_set,
1465 if (res != ZEBRA_OK)
1469 RSET *nsets = nmem_malloc(stream,
1470 sizeof(RSET) * (num_result_sets+1));
1471 nsets[0] = first_set;
1472 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1473 result_sets = nsets;
1477 if (num_result_sets == 0)
1478 *rset = rset_create_null(rset_nmem, kc, 0);
1479 else if (num_result_sets == 1)
1480 *rset = result_sets[0];
1482 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1483 num_result_sets, result_sets,
1484 1 /* ordered */, 0 /* exclusion */,
1485 3 /* relation */, 1 /* distance */);
1491 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1492 Z_AttributesPlusTerm *zapt,
1493 const char *termz_org,
1494 oid_value attributeSet,
1496 int reg_type, int complete_flag,
1497 const char *rank_type,
1498 const char *xpath_use,
1499 int num_bases, char **basenames,
1502 struct rset_key_control *kc)
1504 RSET *result_sets = 0;
1505 int num_result_sets = 0;
1508 term_list_trunc(zh, zapt, termz_org, attributeSet,
1509 stream, reg_type, complete_flag,
1510 rank_type, xpath_use,
1511 num_bases, basenames,
1513 &result_sets, &num_result_sets, kc);
1514 if (res != ZEBRA_OK)
1517 for (i = 0; i<num_result_sets; i++)
1520 res = rpn_search_APT_position(zh, zapt, attributeSet,
1522 num_bases, basenames,
1523 rset_nmem, &first_set,
1525 if (res != ZEBRA_OK)
1527 for (i = 0; i<num_result_sets; i++)
1528 rset_delete(result_sets[i]);
1536 tmp_set[0] = first_set;
1537 tmp_set[1] = result_sets[i];
1539 result_sets[i] = rset_create_prox(
1540 rset_nmem, kc, kc->scope,
1542 1 /* ordered */, 0 /* exclusion */,
1543 3 /* relation */, 1 /* distance */);
1546 if (num_result_sets == 0)
1547 *rset = rset_create_null(rset_nmem, kc, 0);
1548 else if (num_result_sets == 1)
1549 *rset = result_sets[0];
1551 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1552 num_result_sets, result_sets);
1558 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1559 Z_AttributesPlusTerm *zapt,
1560 const char *termz_org,
1561 oid_value attributeSet,
1563 int reg_type, int complete_flag,
1564 const char *rank_type,
1565 const char *xpath_use,
1566 int num_bases, char **basenames,
1569 struct rset_key_control *kc)
1571 RSET *result_sets = 0;
1572 int num_result_sets = 0;
1575 term_list_trunc(zh, zapt, termz_org, attributeSet,
1576 stream, reg_type, complete_flag,
1577 rank_type, xpath_use,
1578 num_bases, basenames,
1580 &result_sets, &num_result_sets,
1582 if (res != ZEBRA_OK)
1584 for (i = 0; i<num_result_sets; i++)
1587 res = rpn_search_APT_position(zh, zapt, attributeSet,
1589 num_bases, basenames,
1590 rset_nmem, &first_set,
1592 if (res != ZEBRA_OK)
1594 for (i = 0; i<num_result_sets; i++)
1595 rset_delete(result_sets[i]);
1603 tmp_set[0] = first_set;
1604 tmp_set[1] = result_sets[i];
1606 result_sets[i] = rset_create_prox(
1607 rset_nmem, kc, kc->scope,
1609 1 /* ordered */, 0 /* exclusion */,
1610 3 /* relation */, 1 /* distance */);
1615 if (num_result_sets == 0)
1616 *rset = rset_create_null(rset_nmem, kc, 0);
1617 else if (num_result_sets == 1)
1618 *rset = result_sets[0];
1620 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1621 num_result_sets, result_sets);
1627 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1628 const char **term_sub,
1630 oid_value attributeSet,
1631 struct grep_info *grep_info,
1641 char *term_tmp = term_dict + strlen(term_dict);
1644 attr_init_APT(&relation, zapt, 2);
1645 relation_value = attr_find(&relation, NULL);
1647 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1649 switch (relation_value)
1652 yaz_log(log_level_rpn, "Relation <");
1653 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1656 term_value = atoi (term_tmp);
1657 gen_regular_rel(term_tmp, term_value-1, 1);
1660 yaz_log(log_level_rpn, "Relation <=");
1661 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1664 term_value = atoi (term_tmp);
1665 gen_regular_rel(term_tmp, term_value, 1);
1668 yaz_log(log_level_rpn, "Relation >=");
1669 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1672 term_value = atoi (term_tmp);
1673 gen_regular_rel(term_tmp, term_value, 0);
1676 yaz_log(log_level_rpn, "Relation >");
1677 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1680 term_value = atoi (term_tmp);
1681 gen_regular_rel(term_tmp, term_value+1, 0);
1685 yaz_log(log_level_rpn, "Relation =");
1686 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1689 term_value = atoi (term_tmp);
1690 sprintf(term_tmp, "(0*%d)", term_value);
1693 /* term_tmp untouched.. */
1694 while (**term_sub != '\0')
1698 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1701 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1702 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1706 zebra_set_partial_result(zh);
1708 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1709 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1713 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1714 const char **term_sub,
1715 oid_value attributeSet, NMEM stream,
1716 struct grep_info *grep_info,
1717 int reg_type, int complete_flag,
1718 int num_bases, char **basenames,
1720 const char *xpath_use,
1721 struct ord_list **ol)
1723 char term_dict[2*IT_MAX_WORD+2];
1726 struct rpn_char_map_info rcmi;
1728 int bases_ok = 0; /* no of databases with OK attribute */
1730 *ol = ord_list_create(stream);
1732 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1734 for (base_no = 0; base_no < num_bases; base_no++)
1736 int max_pos, prefix_len = 0;
1737 int relation_error = 0;
1738 int ord, ord_len, i;
1743 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1745 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1746 basenames[base_no]);
1750 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1751 attributeSet, &ord) != ZEBRA_OK)
1755 *ol = ord_list_append(stream, *ol, ord);
1757 ord_len = key_SU_encode (ord, ord_buf);
1759 term_dict[prefix_len++] = '(';
1760 for (i = 0; i < ord_len; i++)
1762 term_dict[prefix_len++] = 1;
1763 term_dict[prefix_len++] = ord_buf[i];
1765 term_dict[prefix_len++] = ')';
1766 term_dict[prefix_len] = '\0';
1768 if (!numeric_relation(zh, zapt, &termp, term_dict,
1769 attributeSet, grep_info, &max_pos, reg_type,
1770 term_dst, &relation_error))
1774 zebra_setError(zh, relation_error, 0);
1784 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1789 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1790 Z_AttributesPlusTerm *zapt,
1792 oid_value attributeSet,
1794 int reg_type, int complete_flag,
1795 const char *rank_type,
1796 const char *xpath_use,
1797 int num_bases, char **basenames,
1800 struct rset_key_control *kc)
1802 char term_dst[IT_MAX_WORD+1];
1803 const char *termp = termz;
1804 RSET *result_sets = 0;
1805 int num_result_sets = 0;
1807 struct grep_info grep_info;
1809 zint hits_limit_value;
1810 const char *term_ref_id_str = 0;
1812 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1814 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1815 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1819 struct ord_list *ol;
1820 if (alloc_sets == num_result_sets)
1823 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1826 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1827 alloc_sets = alloc_sets + add;
1830 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1831 grep_info.isam_p_indx = 0;
1832 res = numeric_term(zh, zapt, &termp, attributeSet, stream, &grep_info,
1833 reg_type, complete_flag, num_bases, basenames,
1834 term_dst, xpath_use, &ol);
1835 if (res == ZEBRA_FAIL || termp == 0)
1837 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1838 result_sets[num_result_sets] =
1839 rset_trunc(zh, grep_info.isam_p_buf,
1840 grep_info.isam_p_indx, term_dst,
1841 strlen(term_dst), rank_type,
1842 0 /* preserve position */,
1843 zapt->term->which, rset_nmem,
1844 kc, kc->scope, ol, reg_type,
1847 if (!result_sets[num_result_sets])
1853 grep_info_delete(&grep_info);
1855 if (res != ZEBRA_OK)
1857 if (num_result_sets == 0)
1858 *rset = rset_create_null(rset_nmem, kc, 0);
1859 else if (num_result_sets == 1)
1860 *rset = result_sets[0];
1862 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1863 num_result_sets, result_sets);
1869 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1870 Z_AttributesPlusTerm *zapt,
1872 oid_value attributeSet,
1874 const char *rank_type, NMEM rset_nmem,
1876 struct rset_key_control *kc)
1881 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1882 res_get (zh->res, "setTmpDir"),0 );
1883 rsfd = rset_open(*rset, RSETF_WRITE);
1891 rset_write (rsfd, &key);
1896 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1897 oid_value attributeSet, NMEM stream,
1898 Z_SortKeySpecList *sort_sequence,
1899 const char *rank_type,
1902 struct rset_key_control *kc)
1905 int sort_relation_value;
1906 AttrType sort_relation_type;
1913 attr_init_APT(&sort_relation_type, zapt, 7);
1914 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1916 if (!sort_sequence->specs)
1918 sort_sequence->num_specs = 10;
1919 sort_sequence->specs = (Z_SortKeySpec **)
1920 nmem_malloc(stream, sort_sequence->num_specs *
1921 sizeof(*sort_sequence->specs));
1922 for (i = 0; i<sort_sequence->num_specs; i++)
1923 sort_sequence->specs[i] = 0;
1925 if (zapt->term->which != Z_Term_general)
1928 i = atoi_n ((char *) zapt->term->u.general->buf,
1929 zapt->term->u.general->len);
1930 if (i >= sort_sequence->num_specs)
1932 sprintf(termz, "%d", i);
1934 oe.proto = PROTO_Z3950;
1935 oe.oclass = CLASS_ATTSET;
1936 oe.value = attributeSet;
1937 if (!oid_ent_to_oid (&oe, oid))
1940 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1941 sks->sortElement = (Z_SortElement *)
1942 nmem_malloc(stream, sizeof(*sks->sortElement));
1943 sks->sortElement->which = Z_SortElement_generic;
1944 sk = sks->sortElement->u.generic = (Z_SortKey *)
1945 nmem_malloc(stream, sizeof(*sk));
1946 sk->which = Z_SortKey_sortAttributes;
1947 sk->u.sortAttributes = (Z_SortAttributes *)
1948 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
1950 sk->u.sortAttributes->id = oid;
1951 sk->u.sortAttributes->list = zapt->attributes;
1953 sks->sortRelation = (int *)
1954 nmem_malloc(stream, sizeof(*sks->sortRelation));
1955 if (sort_relation_value == 1)
1956 *sks->sortRelation = Z_SortKeySpec_ascending;
1957 else if (sort_relation_value == 2)
1958 *sks->sortRelation = Z_SortKeySpec_descending;
1960 *sks->sortRelation = Z_SortKeySpec_ascending;
1962 sks->caseSensitivity = (int *)
1963 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
1964 *sks->caseSensitivity = 0;
1966 sks->which = Z_SortKeySpec_null;
1967 sks->u.null = odr_nullval ();
1968 sort_sequence->specs[i] = sks;
1969 *rset = rset_create_null(rset_nmem, kc, 0);
1974 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1975 oid_value attributeSet,
1976 struct xpath_location_step *xpath, int max,
1979 oid_value curAttributeSet = attributeSet;
1981 const char *use_string = 0;
1983 attr_init_APT(&use, zapt, 1);
1984 attr_find_ex(&use, &curAttributeSet, &use_string);
1986 if (!use_string || *use_string != '/')
1989 return zebra_parse_xpath_str(use_string, xpath, max, mem);
1994 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
1995 int reg_type, const char *term,
1996 const char *xpath_use,
1998 struct rset_key_control *kc)
2001 struct grep_info grep_info;
2002 char term_dict[2048];
2005 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2006 zinfo_index_category_index,
2009 int ord_len, i, r, max_pos;
2010 int term_type = Z_Term_characterString;
2011 const char *flags = "void";
2013 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2014 return rset_create_null(rset_nmem, kc, 0);
2017 return rset_create_null(rset_nmem, kc, 0);
2019 term_dict[prefix_len++] = '|';
2021 term_dict[prefix_len++] = '(';
2023 ord_len = key_SU_encode (ord, ord_buf);
2024 for (i = 0; i<ord_len; i++)
2026 term_dict[prefix_len++] = 1;
2027 term_dict[prefix_len++] = ord_buf[i];
2029 term_dict[prefix_len++] = ')';
2030 strcpy(term_dict+prefix_len, term);
2032 grep_info.isam_p_indx = 0;
2033 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2034 &grep_info, &max_pos, 0, grep_handle);
2035 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2036 grep_info.isam_p_indx);
2037 rset = rset_trunc(zh, grep_info.isam_p_buf,
2038 grep_info.isam_p_indx, term, strlen(term),
2039 flags, 1, term_type,rset_nmem,
2040 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2041 0 /* term_ref_id_str */);
2042 grep_info_delete(&grep_info);
2047 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2048 int num_bases, char **basenames,
2049 NMEM stream, const char *rank_type, RSET rset,
2050 int xpath_len, struct xpath_location_step *xpath,
2053 struct rset_key_control *kc)
2057 int always_matches = rset ? 0 : 1;
2065 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2066 for (i = 0; i<xpath_len; i++)
2068 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2080 a[@attr = value]/b[@other = othervalue]
2082 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2083 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2084 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2085 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2086 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2087 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2091 dict_grep_cmap (zh->reg->dict, 0, 0);
2093 for (base_no = 0; base_no < num_bases; base_no++)
2095 int level = xpath_len;
2098 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2100 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2101 basenames[base_no]);
2105 while (--level >= 0)
2107 WRBUF xpath_rev = wrbuf_alloc();
2109 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2111 for (i = level; i >= 1; --i)
2113 const char *cp = xpath[i].part;
2119 wrbuf_puts(xpath_rev, "[^/]*");
2120 else if (*cp == ' ')
2121 wrbuf_puts(xpath_rev, "\001 ");
2123 wrbuf_putc(xpath_rev, *cp);
2125 /* wrbuf_putc does not null-terminate , but
2126 wrbuf_puts below ensures it does.. so xpath_rev
2127 is OK iff length is > 0 */
2129 wrbuf_puts(xpath_rev, "/");
2131 else if (i == 1) /* // case */
2132 wrbuf_puts(xpath_rev, ".*");
2134 if (xpath[level].predicate &&
2135 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2136 xpath[level].predicate->u.relation.name[0])
2138 WRBUF wbuf = wrbuf_alloc();
2139 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2140 if (xpath[level].predicate->u.relation.value)
2142 const char *cp = xpath[level].predicate->u.relation.value;
2143 wrbuf_putc(wbuf, '=');
2147 if (strchr(REGEX_CHARS, *cp))
2148 wrbuf_putc(wbuf, '\\');
2149 wrbuf_putc(wbuf, *cp);
2153 wrbuf_puts(wbuf, "");
2154 rset_attr = xpath_trunc(
2155 zh, stream, '0', wrbuf_buf(wbuf), ZEBRA_XPATH_ATTR_NAME,
2157 wrbuf_free(wbuf, 1);
2163 wrbuf_free(xpath_rev, 1);
2167 yaz_log(log_level_rpn, "xpath_rev (%d) = %.*s", level,
2168 wrbuf_len(xpath_rev), wrbuf_buf(xpath_rev));
2169 if (wrbuf_len(xpath_rev))
2171 rset_start_tag = xpath_trunc(zh, stream, '0',
2172 wrbuf_buf(xpath_rev),
2173 ZEBRA_XPATH_ELM_BEGIN,
2176 rset = rset_start_tag;
2179 rset_end_tag = xpath_trunc(zh, stream, '0',
2180 wrbuf_buf(xpath_rev),
2181 ZEBRA_XPATH_ELM_END,
2184 rset = rset_create_between(rset_nmem, kc, kc->scope,
2185 rset_start_tag, rset,
2186 rset_end_tag, rset_attr);
2189 wrbuf_free(xpath_rev, 1);
2197 #define MAX_XPATH_STEPS 10
2199 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2200 oid_value attributeSet, NMEM stream,
2201 Z_SortKeySpecList *sort_sequence,
2202 int num_bases, char **basenames,
2205 struct rset_key_control *kc)
2207 ZEBRA_RES res = ZEBRA_OK;
2209 char *search_type = NULL;
2210 char rank_type[128];
2213 char termz[IT_MAX_WORD+1];
2215 const char *xpath_use = 0;
2216 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2220 log_level_rpn = yaz_log_module_level("rpn");
2223 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2224 rank_type, &complete_flag, &sort_flag);
2226 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2227 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2228 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2229 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2231 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2235 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2236 rank_type, rset_nmem, rset, kc);
2237 /* consider if an X-Path query is used */
2238 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2239 xpath, MAX_XPATH_STEPS, stream);
2242 if (xpath[xpath_len-1].part[0] == '@')
2243 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2245 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2252 attr_init_APT(&relation, zapt, 2);
2253 relation_value = attr_find(&relation, NULL);
2255 if (relation_value == 103) /* alwaysmatches */
2257 *rset = 0; /* signal no "term" set */
2258 return rpn_search_xpath(zh, num_bases, basenames,
2259 stream, rank_type, *rset,
2260 xpath_len, xpath, rset_nmem, rset, kc);
2265 /* search using one of the various search type strategies
2266 termz is our UTF-8 search term
2267 attributeSet is top-level default attribute set
2268 stream is ODR for search
2269 reg_id is the register type
2270 complete_flag is 1 for complete subfield, 0 for incomplete
2271 xpath_use is use-attribute to be used for X-Path search, 0 for none
2273 if (!strcmp(search_type, "phrase"))
2275 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2276 reg_id, complete_flag, rank_type,
2278 num_bases, basenames, rset_nmem,
2281 else if (!strcmp(search_type, "and-list"))
2283 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2284 reg_id, complete_flag, rank_type,
2286 num_bases, basenames, rset_nmem,
2289 else if (!strcmp(search_type, "or-list"))
2291 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2292 reg_id, complete_flag, rank_type,
2294 num_bases, basenames, rset_nmem,
2297 else if (!strcmp(search_type, "local"))
2299 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2300 rank_type, rset_nmem, rset, kc);
2302 else if (!strcmp(search_type, "numeric"))
2304 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2305 reg_id, complete_flag, rank_type,
2307 num_bases, basenames, rset_nmem,
2312 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2315 if (res != ZEBRA_OK)
2319 return rpn_search_xpath(zh, num_bases, basenames,
2320 stream, rank_type, *rset,
2321 xpath_len, xpath, rset_nmem, rset, kc);
2324 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2325 oid_value attributeSet,
2326 NMEM stream, NMEM rset_nmem,
2327 Z_SortKeySpecList *sort_sequence,
2328 int num_bases, char **basenames,
2329 RSET **result_sets, int *num_result_sets,
2330 Z_Operator *parent_op,
2331 struct rset_key_control *kc);
2333 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2336 ZEBRA_RES res = ZEBRA_OK;
2337 if (zs->which == Z_RPNStructure_complex)
2339 if (res == ZEBRA_OK)
2340 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2342 if (res == ZEBRA_OK)
2343 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2346 else if (zs->which == Z_RPNStructure_simple)
2348 if (zs->u.simple->which == Z_Operand_APT)
2350 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2351 AttrType global_hits_limit_attr;
2354 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2356 l = attr_find(&global_hits_limit_attr, NULL);
2364 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2365 oid_value attributeSet,
2366 NMEM stream, NMEM rset_nmem,
2367 Z_SortKeySpecList *sort_sequence,
2368 int num_bases, char **basenames,
2371 RSET *result_sets = 0;
2372 int num_result_sets = 0;
2374 struct rset_key_control *kc = zebra_key_control_create(zh);
2376 res = rpn_search_structure(zh, zs, attributeSet,
2379 num_bases, basenames,
2380 &result_sets, &num_result_sets,
2381 0 /* no parent op */,
2383 if (res != ZEBRA_OK)
2386 for (i = 0; i<num_result_sets; i++)
2387 rset_delete(result_sets[i]);
2392 assert(num_result_sets == 1);
2393 assert(result_sets);
2394 assert(*result_sets);
2395 *result_set = *result_sets;
2401 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2402 oid_value attributeSet,
2403 NMEM stream, NMEM rset_nmem,
2404 Z_SortKeySpecList *sort_sequence,
2405 int num_bases, char **basenames,
2406 RSET **result_sets, int *num_result_sets,
2407 Z_Operator *parent_op,
2408 struct rset_key_control *kc)
2410 *num_result_sets = 0;
2411 if (zs->which == Z_RPNStructure_complex)
2414 Z_Operator *zop = zs->u.complex->roperator;
2415 RSET *result_sets_l = 0;
2416 int num_result_sets_l = 0;
2417 RSET *result_sets_r = 0;
2418 int num_result_sets_r = 0;
2420 res = rpn_search_structure(zh, zs->u.complex->s1,
2421 attributeSet, stream, rset_nmem,
2423 num_bases, basenames,
2424 &result_sets_l, &num_result_sets_l,
2426 if (res != ZEBRA_OK)
2429 for (i = 0; i<num_result_sets_l; i++)
2430 rset_delete(result_sets_l[i]);
2433 res = rpn_search_structure(zh, zs->u.complex->s2,
2434 attributeSet, stream, rset_nmem,
2436 num_bases, basenames,
2437 &result_sets_r, &num_result_sets_r,
2439 if (res != ZEBRA_OK)
2442 for (i = 0; i<num_result_sets_l; i++)
2443 rset_delete(result_sets_l[i]);
2444 for (i = 0; i<num_result_sets_r; i++)
2445 rset_delete(result_sets_r[i]);
2449 /* make a new list of result for all children */
2450 *num_result_sets = num_result_sets_l + num_result_sets_r;
2451 *result_sets = nmem_malloc(stream, *num_result_sets *
2452 sizeof(**result_sets));
2453 memcpy(*result_sets, result_sets_l,
2454 num_result_sets_l * sizeof(**result_sets));
2455 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2456 num_result_sets_r * sizeof(**result_sets));
2458 if (!parent_op || parent_op->which != zop->which
2459 || (zop->which != Z_Operator_and &&
2460 zop->which != Z_Operator_or))
2462 /* parent node different from this one (or non-present) */
2463 /* we must combine result sets now */
2467 case Z_Operator_and:
2468 rset = rset_create_and(rset_nmem, kc,
2470 *num_result_sets, *result_sets);
2473 rset = rset_create_or(rset_nmem, kc,
2474 kc->scope, 0, /* termid */
2475 *num_result_sets, *result_sets);
2477 case Z_Operator_and_not:
2478 rset = rset_create_not(rset_nmem, kc,
2483 case Z_Operator_prox:
2484 if (zop->u.prox->which != Z_ProximityOperator_known)
2487 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2491 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2493 zebra_setError_zint(zh,
2494 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2495 *zop->u.prox->u.known);
2500 rset = rset_create_prox(rset_nmem, kc,
2502 *num_result_sets, *result_sets,
2503 *zop->u.prox->ordered,
2504 (!zop->u.prox->exclusion ?
2505 0 : *zop->u.prox->exclusion),
2506 *zop->u.prox->relationType,
2507 *zop->u.prox->distance );
2511 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2514 *num_result_sets = 1;
2515 *result_sets = nmem_malloc(stream, *num_result_sets *
2516 sizeof(**result_sets));
2517 (*result_sets)[0] = rset;
2520 else if (zs->which == Z_RPNStructure_simple)
2525 if (zs->u.simple->which == Z_Operand_APT)
2527 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2528 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2529 attributeSet, stream, sort_sequence,
2530 num_bases, basenames, rset_nmem, &rset,
2532 if (res != ZEBRA_OK)
2535 else if (zs->u.simple->which == Z_Operand_resultSetId)
2537 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2538 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2542 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2543 zs->u.simple->u.resultSetId);
2550 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2553 *num_result_sets = 1;
2554 *result_sets = nmem_malloc(stream, *num_result_sets *
2555 sizeof(**result_sets));
2556 (*result_sets)[0] = rset;
2560 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2571 * indent-tabs-mode: nil
2573 * vim: shiftwidth=4 tabstop=8 expandtab