1 /* $Id: zrpn.c,v 1.178 2005-04-25 21:40:34 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
33 #include <zebra_xpath.h>
38 static const struct key_control it_ctrl =
40 sizeof(struct it_key),
41 2, /* we have sysnos and seqnos in this key, nothing more */
43 key_logdump_txt, /* FIXME - clean up these functions */
48 const struct key_control *key_it_ctrl = &it_ctrl;
50 struct rpn_char_map_info
61 Z_AttributesPlusTerm *zapt;
65 static int log_level_set = 0;
66 static int log_level_rpn = 0;
68 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
70 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
71 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
75 const char *outp = *out;
76 yaz_log(YLOG_LOG, "---");
79 yaz_log(YLOG_LOG, "%02X", *outp);
87 static void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
88 struct rpn_char_map_info *map_info)
90 map_info->zm = reg->zebra_maps;
91 map_info->reg_type = reg_type;
92 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
95 static int attr_find_ex(AttrType *src, oid_value *attributeSetP,
96 const char **string_value)
100 num_attributes = src->zapt->attributes->num_attributes;
101 while (src->major < num_attributes)
103 Z_AttributeElement *element;
105 element = src->zapt->attributes->attributes[src->major];
106 if (src->type == *element->attributeType)
108 switch (element->which)
110 case Z_AttributeValue_numeric:
112 if (element->attributeSet && attributeSetP)
116 attrset = oid_getentbyoid(element->attributeSet);
117 *attributeSetP = attrset->value;
119 return *element->value.numeric;
121 case Z_AttributeValue_complex:
122 if (src->minor >= element->value.complex->num_list)
124 if (element->attributeSet && attributeSetP)
128 attrset = oid_getentbyoid(element->attributeSet);
129 *attributeSetP = attrset->value;
131 if (element->value.complex->list[src->minor]->which ==
132 Z_StringOrNumeric_numeric)
136 *element->value.complex->list[src->minor-1]->u.numeric;
138 else if (element->value.complex->list[src->minor]->which ==
139 Z_StringOrNumeric_string)
145 element->value.complex->list[src->minor-1]->u.string;
159 static int attr_find(AttrType *src, oid_value *attributeSetP)
161 return attr_find_ex(src, attributeSetP, 0);
164 static void attr_init(AttrType *src, Z_AttributesPlusTerm *zapt,
187 static void term_untrans(ZebraHandle zh, int reg_type,
188 char *dst, const char *src)
193 const char *cp = zebra_maps_output(zh->reg->zebra_maps,
195 if (!cp && len < IT_MAX_WORD-1)
198 while (*cp && len < IT_MAX_WORD-1)
204 static void add_isam_p(const char *name, const char *info,
209 log_level_rpn = yaz_log_module_level("rpn");
212 if (p->isam_p_indx == p->isam_p_size)
214 ISAM_P *new_isam_p_buf;
218 p->isam_p_size = 2*p->isam_p_size + 100;
219 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
223 memcpy(new_isam_p_buf, p->isam_p_buf,
224 p->isam_p_indx * sizeof(*p->isam_p_buf));
225 xfree(p->isam_p_buf);
227 p->isam_p_buf = new_isam_p_buf;
230 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
233 memcpy(new_term_no, p->isam_p_buf,
234 p->isam_p_indx * sizeof(*p->term_no));
237 p->term_no = new_term_no;
240 assert(*info == sizeof(*p->isam_p_buf));
241 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
248 char term_tmp[IT_MAX_WORD];
250 int len = key_SU_decode (&su_code, name);
252 term_untrans (p->zh, p->reg_type, term_tmp, name+len+1);
253 yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp);
254 zebraExplain_lookup_ord (p->zh->reg->zei,
255 su_code, &db, &set, &use);
256 yaz_log(log_level_rpn, "grep: set=%d use=%d db=%s", set, use, db);
258 resultSetAddTerm(p->zh, p->termset, name[len], db,
265 static int grep_handle(char *name, const char *info, void *p)
267 add_isam_p(name, info, (struct grep_info *) p);
271 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
272 const char *ct1, const char *ct2, int first)
274 const char *s1, *s0 = *src;
277 /* skip white space */
280 if (ct1 && strchr(ct1, *s0))
282 if (ct2 && strchr(ct2, *s0))
285 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
286 if (**map != *CHR_SPACE)
295 static void esc_str(char *out_buf, int out_size,
296 const char *in_buf, int in_size)
302 assert(out_size > 20);
304 for (k = 0; k<in_size; k++)
306 int c = in_buf[k] & 0xff;
308 if (c < 32 || c > 126)
312 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
313 if (strlen(out_buf) > out_size-20)
315 strcat(out_buf, "..");
321 #define REGEX_CHARS " []()|.*+?!"
323 /* term_100: handle term, where trunc = none(no operators at all) */
324 static int term_100(ZebraMaps zebra_maps, int reg_type,
325 const char **src, char *dst, int space_split,
333 const char *space_start = 0;
334 const char *space_end = 0;
336 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
343 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
347 if (**map == *CHR_SPACE)
350 else /* complete subfield only. */
352 if (**map == *CHR_SPACE)
353 { /* save space mapping for later .. */
358 else if (space_start)
359 { /* reload last space */
360 while (space_start < space_end)
362 if (strchr(REGEX_CHARS, *space_start))
364 dst_term[j++] = *space_start;
365 dst[i++] = *space_start++;
368 space_start = space_end = 0;
371 /* add non-space char */
372 memcpy(dst_term+j, s1, s0 - s1);
378 if (strchr(REGEX_CHARS, *s1))
386 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
388 strcpy(dst + i, map[0]);
398 /* term_101: handle term, where trunc = Process # */
399 static int term_101(ZebraMaps zebra_maps, int reg_type,
400 const char **src, char *dst, int space_split,
408 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
417 dst_term[j++] = *s0++;
423 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
425 if (space_split && **map == *CHR_SPACE)
428 /* add non-space char */
429 memcpy(dst_term+j, s1, s0 - s1);
435 if (strchr(REGEX_CHARS, *s1))
443 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
445 strcpy(dst + i, map[0]);
451 dst_term[j++] = '\0';
456 /* term_103: handle term, where trunc = re-2 (regular expressions) */
457 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
458 char *dst, int *errors, int space_split,
466 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
469 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
470 isdigit(((const unsigned char *)s0)[1]))
472 *errors = s0[1] - '0';
479 if (strchr("^\\()[].*+?|-", *s0))
488 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
490 if (space_split && **map == *CHR_SPACE)
493 /* add non-space char */
494 memcpy(dst_term+j, s1, s0 - s1);
500 if (strchr(REGEX_CHARS, *s1))
508 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
510 strcpy(dst + i, map[0]);
522 /* term_103: handle term, where trunc = re-1 (regular expressions) */
523 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
524 char *dst, int space_split, char *dst_term)
526 return term_103(zebra_maps, reg_type, src, dst, NULL, space_split,
531 /* term_104: handle term, where trunc = Process # and ! */
532 static int term_104(ZebraMaps zebra_maps, int reg_type,
533 const char **src, char *dst, int space_split,
541 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
548 dst_term[j++] = *s0++;
549 if (*s0 >= '0' && *s0 <= '9')
552 while (*s0 >= '0' && *s0 <= '9')
554 limit = limit * 10 + (*s0 - '0');
555 dst_term[j++] = *s0++;
575 dst_term[j++] = *s0++;
580 dst_term[j++] = *s0++;
585 map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
586 if (space_split && **map == *CHR_SPACE)
590 if (strchr(REGEX_CHARS, *s1))
598 dst_term[j++] = '\0';
603 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
604 static int term_105(ZebraMaps zebra_maps, int reg_type,
605 const char **src, char *dst, int space_split,
606 char *dst_term, int right_truncate)
613 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
622 dst_term[j++] = *s0++;
627 dst_term[j++] = *s0++;
632 map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
633 if (space_split && **map == *CHR_SPACE)
637 if (strchr(REGEX_CHARS, *s1))
651 dst_term[j++] = '\0';
657 /* gen_regular_rel - generate regular expression from relation
658 * val: border value (inclusive)
659 * islt: 1 if <=; 0 if >=.
661 static void gen_regular_rel(char *dst, int val, int islt)
668 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
672 strcpy(dst, "(-[0-9]+|(");
680 strcpy(dst, "([0-9]+|-(");
692 sprintf(numstr, "%d", val);
693 for (w = strlen(numstr); --w >= 0; pos++)
712 strcpy(dst + dst_p, numstr);
713 dst_p = strlen(dst) - pos - 1;
741 for (i = 0; i<pos; i++)
754 /* match everything less than 10^(pos-1) */
756 for (i = 1; i<pos; i++)
757 strcat(dst, "[0-9]?");
761 /* match everything greater than 10^pos */
762 for (i = 0; i <= pos; i++)
763 strcat(dst, "[0-9]");
764 strcat(dst, "[0-9]*");
769 void string_rel_add_char(char **term_p, const char *src, int *indx)
771 if (src[*indx] == '\\')
772 *(*term_p)++ = src[(*indx)++];
773 *(*term_p)++ = src[(*indx)++];
777 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
778 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
779 * >= abc ([b-].*|a[c-].*|ab[c-].*)
780 * ([^-a].*|a[^-b].*|ab[c-].*)
781 * < abc ([-0].*|a[-a].*|ab[-b].*)
782 * ([^a-].*|a[^b-].*|ab[^c-].*)
783 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
784 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
786 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
787 const char **term_sub, char *term_dict,
788 oid_value attributeSet,
789 int reg_type, int space_split, char *term_dst,
795 char *term_tmp = term_dict + strlen(term_dict);
796 char term_component[2*IT_MAX_WORD+20];
798 attr_init(&relation, zapt, 2);
799 relation_value = attr_find(&relation, NULL);
802 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
803 switch (relation_value)
806 if (!term_100(zh->reg->zebra_maps, reg_type,
807 term_sub, term_component,
808 space_split, term_dst))
810 yaz_log(log_level_rpn, "Relation <");
813 for (i = 0; term_component[i]; )
820 string_rel_add_char(&term_tmp, term_component, &j);
825 string_rel_add_char(&term_tmp, term_component, &i);
832 if ((term_tmp - term_dict) > IT_MAX_WORD)
839 if (!term_100(zh->reg->zebra_maps, reg_type,
840 term_sub, term_component,
841 space_split, term_dst))
843 yaz_log(log_level_rpn, "Relation <=");
846 for (i = 0; term_component[i]; )
851 string_rel_add_char(&term_tmp, term_component, &j);
855 string_rel_add_char(&term_tmp, term_component, &i);
864 if ((term_tmp - term_dict) > IT_MAX_WORD)
867 for (i = 0; term_component[i]; )
868 string_rel_add_char(&term_tmp, term_component, &i);
873 if (!term_100 (zh->reg->zebra_maps, reg_type,
874 term_sub, term_component, space_split, term_dst))
876 yaz_log(log_level_rpn, "Relation >");
879 for (i = 0; term_component[i];)
884 string_rel_add_char(&term_tmp, term_component, &j);
889 string_rel_add_char(&term_tmp, term_component, &i);
897 if ((term_tmp - term_dict) > IT_MAX_WORD)
900 for (i = 0; term_component[i];)
901 string_rel_add_char(&term_tmp, term_component, &i);
908 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
909 term_component, space_split, term_dst))
911 yaz_log(log_level_rpn, "Relation >=");
914 for (i = 0; term_component[i];)
921 string_rel_add_char(&term_tmp, term_component, &j);
924 if (term_component[i+1])
928 string_rel_add_char(&term_tmp, term_component, &i);
932 string_rel_add_char(&term_tmp, term_component, &i);
939 if ((term_tmp - term_dict) > IT_MAX_WORD)
948 yaz_log(log_level_rpn, "Relation =");
949 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
950 term_component, space_split, term_dst))
952 strcat(term_tmp, "(");
953 strcat(term_tmp, term_component);
954 strcat(term_tmp, ")");
963 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
964 const char **term_sub,
965 oid_value attributeSet, NMEM stream,
966 struct grep_info *grep_info,
967 int reg_type, int complete_flag,
968 int num_bases, char **basenames,
969 char *term_dst, int xpath_use);
971 static ZEBRA_RES term_trunc(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
972 const char **term_sub,
973 oid_value attributeSet, NMEM stream,
974 struct grep_info *grep_info,
975 int reg_type, int complete_flag,
976 int num_bases, char **basenames,
978 const char *rank_type, int xpath_use,
984 grep_info->isam_p_indx = 0;
985 res = string_term(zh, zapt, term_sub, attributeSet, stream, grep_info,
986 reg_type, complete_flag, num_bases, basenames,
987 term_dst, xpath_use);
990 if (!*term_sub) /* no more terms ? */
992 yaz_log(log_level_rpn, "term: %s", term_dst);
993 *rset = rset_trunc(zh, grep_info->isam_p_buf,
994 grep_info->isam_p_indx, term_dst,
995 strlen(term_dst), rank_type, 1 /* preserve pos */,
996 zapt->term->which, rset_nmem,
997 key_it_ctrl, key_it_ctrl->scope);
1003 static char *nmem_strdup_i(NMEM nmem, int v)
1006 sprintf(val_str, "%d", v);
1007 return nmem_strdup(nmem, val_str);
1010 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1011 const char **term_sub,
1012 oid_value attributeSet, NMEM stream,
1013 struct grep_info *grep_info,
1014 int reg_type, int complete_flag,
1015 int num_bases, char **basenames,
1016 char *term_dst, int xpath_use)
1018 char term_dict[2*IT_MAX_WORD+4000];
1020 AttrType truncation;
1021 int truncation_value;
1024 const char *use_string = 0;
1025 oid_value curAttributeSet = attributeSet;
1027 struct rpn_char_map_info rcmi;
1028 int space_split = complete_flag ? 0 : 1;
1030 int bases_ok = 0; /* no of databases with OK attribute */
1031 int errCode = 0; /* err code (if any is not OK) */
1032 char *errString = 0; /* addinfo */
1034 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1035 attr_init(&use, zapt, 1);
1036 use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1037 yaz_log(log_level_rpn, "string_term, use value %d", use_value);
1038 attr_init(&truncation, zapt, 5);
1039 truncation_value = attr_find(&truncation, NULL);
1040 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1042 if (use_value == -1) /* no attribute - assumy "any" */
1044 for (base_no = 0; base_no < num_bases; base_no++)
1048 int regex_range = 0;
1051 data1_local_attribute id_xpath_attr;
1052 data1_local_attribute *local_attr;
1053 int max_pos, prefix_len = 0;
1058 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1060 zh->errCode = 109; /* Database unavailable */
1061 zh->errString = basenames[base_no];
1064 if (xpath_use > 0 && use_value == -2)
1066 /* xpath mode and we have a string attribute */
1067 attp.local_attributes = &id_xpath_attr;
1068 attp.attset_ordinal = VAL_IDXPATH;
1069 id_xpath_attr.next = 0;
1071 use_value = xpath_use; /* xpath_use as use-attribute now */
1072 id_xpath_attr.local = use_value;
1074 else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
1076 /* X-Path attribute, use numeric value directly */
1077 attp.local_attributes = &id_xpath_attr;
1078 attp.attset_ordinal = VAL_IDXPATH;
1079 id_xpath_attr.next = 0;
1080 id_xpath_attr.local = use_value;
1082 else if (use_string &&
1083 (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
1086 /* we have a match for a raw string attribute */
1091 term_dict[prefix_len++] = '|';
1093 term_dict[prefix_len++] = '(';
1095 ord_len = key_SU_encode (ord, ord_buf);
1096 for (i = 0; i<ord_len; i++)
1098 term_dict[prefix_len++] = 1;
1099 term_dict[prefix_len++] = ord_buf[i];
1101 attp.local_attributes = 0; /* no more attributes */
1105 /* lookup in the .att files . Allow string as well */
1106 if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1109 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1110 curAttributeSet, use_value, r);
1113 /* set was found, but value wasn't defined */
1116 errString = nmem_strdup(stream, use_string);
1118 errString = nmem_strdup_i (stream, use_value);
1123 struct oident oident;
1125 oident.proto = PROTO_Z3950;
1126 oident.oclass = CLASS_ATTSET;
1127 oident.value = curAttributeSet;
1128 oid_ent_to_oid (&oident, oid);
1131 errString = nmem_strdup(stream, oident.desc);
1136 for (local_attr = attp.local_attributes; local_attr;
1137 local_attr = local_attr->next)
1142 ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1143 attp.attset_ordinal,
1148 term_dict[prefix_len++] = '|';
1150 term_dict[prefix_len++] = '(';
1152 ord_len = key_SU_encode (ord, ord_buf);
1153 for (i = 0; i<ord_len; i++)
1155 term_dict[prefix_len++] = 1;
1156 term_dict[prefix_len++] = ord_buf[i];
1163 term_dict[prefix_len++] = ')';
1164 term_dict[prefix_len++] = 1;
1165 term_dict[prefix_len++] = reg_type;
1166 yaz_log(log_level_rpn, "reg_type = %d", term_dict[prefix_len-1]);
1167 term_dict[prefix_len] = '\0';
1169 switch (truncation_value)
1171 case -1: /* not specified */
1172 case 100: /* do not truncate */
1173 if (!string_relation (zh, zapt, &termp, term_dict,
1175 reg_type, space_split, term_dst,
1180 zh->errCode = relation_error;
1187 case 1: /* right truncation */
1188 term_dict[j++] = '(';
1189 if (!term_100(zh->reg->zebra_maps, reg_type,
1190 &termp, term_dict + j, space_split, term_dst))
1195 strcat(term_dict, ".*)");
1197 case 2: /* keft truncation */
1198 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1199 if (!term_100(zh->reg->zebra_maps, reg_type,
1200 &termp, term_dict + j, space_split, term_dst))
1205 strcat(term_dict, ")");
1207 case 3: /* left&right truncation */
1208 term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
1209 if (!term_100(zh->reg->zebra_maps, reg_type,
1210 &termp, term_dict + j, space_split, term_dst))
1215 strcat(term_dict, ".*)");
1217 case 101: /* process # in term */
1218 term_dict[j++] = '(';
1219 if (!term_101(zh->reg->zebra_maps, reg_type,
1220 &termp, term_dict + j, space_split, term_dst))
1225 strcat(term_dict, ")");
1227 case 102: /* Regexp-1 */
1228 term_dict[j++] = '(';
1229 if (!term_102(zh->reg->zebra_maps, reg_type,
1230 &termp, term_dict + j, space_split, term_dst))
1235 strcat(term_dict, ")");
1237 case 103: /* Regexp-2 */
1239 term_dict[j++] = '(';
1241 if (!term_103(zh->reg->zebra_maps, reg_type,
1242 &termp, term_dict + j, ®ex_range,
1243 space_split, term_dst))
1248 strcat(term_dict, ")");
1249 case 104: /* process # and ! in term */
1250 term_dict[j++] = '(';
1251 if (!term_104(zh->reg->zebra_maps, reg_type,
1252 &termp, term_dict + j, space_split, term_dst))
1257 strcat(term_dict, ")");
1259 case 105: /* process * and ! in term */
1260 term_dict[j++] = '(';
1261 if (!term_105(zh->reg->zebra_maps, reg_type,
1262 &termp, term_dict + j, space_split, term_dst, 1))
1267 strcat(term_dict, ")");
1269 case 106: /* process * and ! in term */
1270 term_dict[j++] = '(';
1271 if (!term_105(zh->reg->zebra_maps, reg_type,
1272 &termp, term_dict + j, space_split, term_dst, 0))
1277 strcat(term_dict, ")");
1281 zh->errString = nmem_strdup_i(stream, truncation_value);
1287 const char *input = term_dict + prefix_len;
1288 esc_str(buf, sizeof(buf), input, strlen(input));
1292 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
1293 r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
1294 grep_info, &max_pos, init_pos,
1297 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1302 zh->errCode = errCode;
1303 zh->errString = errString;
1307 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1312 /* convert APT search term to UTF8 */
1313 static ZEBRA_RES zapt_term_to_utf8(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1317 Z_Term *term = zapt->term;
1319 switch (term->which)
1321 case Z_Term_general:
1322 if (zh->iconv_to_utf8 != 0)
1324 char *inbuf = term->u.general->buf;
1325 size_t inleft = term->u.general->len;
1326 char *outbuf = termz;
1327 size_t outleft = IT_MAX_WORD-1;
1330 ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
1332 if (ret == (size_t)(-1))
1334 ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0);
1342 sizez = term->u.general->len;
1343 if (sizez > IT_MAX_WORD-1)
1344 sizez = IT_MAX_WORD-1;
1345 memcpy (termz, term->u.general->buf, sizez);
1346 termz[sizez] = '\0';
1349 case Z_Term_characterString:
1350 sizez = strlen(term->u.characterString);
1351 if (sizez > IT_MAX_WORD-1)
1352 sizez = IT_MAX_WORD-1;
1353 memcpy (termz, term->u.characterString, sizez);
1354 termz[sizez] = '\0';
1363 /* convert APT SCAN term to internal cmap */
1364 static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1365 char *termz, int reg_type)
1367 char termz0[IT_MAX_WORD];
1369 if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL)
1370 return ZEBRA_FAIL; /* error */
1374 const char *cp = (const char *) termz0;
1375 const char *cp_end = cp + strlen(cp);
1378 const char *space_map = NULL;
1381 while ((len = (cp_end - cp)) > 0)
1383 map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0);
1384 if (**map == *CHR_SPACE)
1389 for (src = space_map; *src; src++)
1392 for (src = *map; *src; src++)
1401 char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1402 const char *termz, NMEM stream, unsigned reg_id)
1405 AttrType truncation;
1406 int truncation_value;
1409 attr_init(&truncation, zapt, 5);
1410 truncation_value = attr_find(&truncation, NULL);
1412 switch (truncation_value)
1432 wrbuf = zebra_replace(zh->reg->zebra_maps, reg_id, ex_list,
1433 termz, strlen(termz));
1435 return nmem_strdup(stream, termz);
1438 char *buf = (char*) nmem_malloc(stream, wrbuf_len(wrbuf)+1);
1439 memcpy (buf, wrbuf_buf(wrbuf), wrbuf_len(wrbuf));
1440 buf[wrbuf_len(wrbuf)] = '\0';
1445 static void grep_info_delete(struct grep_info *grep_info)
1448 xfree(grep_info->term_no);
1450 xfree(grep_info->isam_p_buf);
1453 static int grep_info_prepare(ZebraHandle zh,
1454 Z_AttributesPlusTerm *zapt,
1455 struct grep_info *grep_info,
1460 int termset_value_numeric;
1461 const char *termset_value_string;
1464 grep_info->term_no = 0;
1466 grep_info->isam_p_size = 0;
1467 grep_info->isam_p_buf = NULL;
1469 grep_info->reg_type = reg_type;
1470 grep_info->termset = 0;
1474 attr_init(&termset, zapt, 8);
1475 termset_value_numeric =
1476 attr_find_ex(&termset, NULL, &termset_value_string);
1477 if (termset_value_numeric != -1)
1480 const char *termset_name = 0;
1481 if (termset_value_numeric != -2)
1484 sprintf(resname, "%d", termset_value_numeric);
1485 termset_name = resname;
1488 termset_name = termset_value_string;
1489 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1490 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1491 if (!grep_info->termset)
1494 zh->errString = nmem_strdup(stream, termset_name);
1502 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1503 Z_AttributesPlusTerm *zapt,
1504 const char *termz_org,
1505 oid_value attributeSet,
1507 int reg_type, int complete_flag,
1508 const char *rank_type, int xpath_use,
1509 int num_bases, char **basenames,
1511 RSET **result_sets, int *num_result_sets)
1513 char term_dst[IT_MAX_WORD+1];
1514 struct grep_info grep_info;
1515 char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
1516 const char *termp = termz;
1519 *num_result_sets = 0;
1521 if (grep_info_prepare(zh, zapt, &grep_info, reg_type, stream))
1527 if (alloc_sets == *num_result_sets)
1530 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1533 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1534 alloc_sets = alloc_sets + add;
1535 *result_sets = rnew;
1537 res = term_trunc(zh, zapt, &termp, attributeSet,
1539 reg_type, complete_flag,
1540 num_bases, basenames,
1541 term_dst, rank_type,
1542 xpath_use, rset_nmem,
1543 &(*result_sets)[*num_result_sets]);
1544 if (res != ZEBRA_OK)
1547 for (i = 0; i < *num_result_sets; i++)
1548 rset_delete((*result_sets)[i]);
1549 grep_info_delete (&grep_info);
1552 if ((*result_sets)[*num_result_sets] == 0)
1554 (*num_result_sets)++;
1556 grep_info_delete(&grep_info);
1560 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1561 Z_AttributesPlusTerm *zapt,
1562 const char *termz_org,
1563 oid_value attributeSet,
1565 int reg_type, int complete_flag,
1566 const char *rank_type, int xpath_use,
1567 int num_bases, char **basenames,
1571 RSET *result_sets = 0;
1572 int num_result_sets = 0;
1574 term_list_trunc(zh, zapt, termz_org, attributeSet,
1575 stream, reg_type, complete_flag,
1576 rank_type, xpath_use,
1577 num_bases, basenames,
1579 &result_sets, &num_result_sets);
1580 if (res != ZEBRA_OK)
1582 if (num_result_sets == 0)
1583 *rset = rsnull_create (rset_nmem, key_it_ctrl);
1584 else if (num_result_sets == 1)
1585 *rset = result_sets[0];
1587 *rset = rsprox_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1588 num_result_sets, result_sets,
1589 1 /* ordered */, 0 /* exclusion */,
1590 3 /* relation */, 1 /* distance */);
1596 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1597 Z_AttributesPlusTerm *zapt,
1598 const char *termz_org,
1599 oid_value attributeSet,
1601 int reg_type, int complete_flag,
1602 const char *rank_type,
1604 int num_bases, char **basenames,
1608 RSET *result_sets = 0;
1609 int num_result_sets = 0;
1611 term_list_trunc(zh, zapt, termz_org, attributeSet,
1612 stream, reg_type, complete_flag,
1613 rank_type, xpath_use,
1614 num_bases, basenames,
1616 &result_sets, &num_result_sets);
1617 if (res != ZEBRA_OK)
1619 if (num_result_sets == 0)
1620 *rset = rsnull_create (rset_nmem, key_it_ctrl);
1621 else if (num_result_sets == 1)
1622 *rset = result_sets[0];
1624 *rset = rsmulti_or_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1625 num_result_sets, result_sets);
1631 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1632 Z_AttributesPlusTerm *zapt,
1633 const char *termz_org,
1634 oid_value attributeSet,
1636 int reg_type, int complete_flag,
1637 const char *rank_type,
1639 int num_bases, char **basenames,
1643 RSET *result_sets = 0;
1644 int num_result_sets = 0;
1646 term_list_trunc(zh, zapt, termz_org, attributeSet,
1647 stream, reg_type, complete_flag,
1648 rank_type, xpath_use,
1649 num_bases, basenames,
1651 &result_sets, &num_result_sets);
1652 if (res != ZEBRA_OK)
1654 if (num_result_sets == 0)
1655 *rset = rsnull_create (rset_nmem, key_it_ctrl);
1656 else if (num_result_sets == 1)
1657 *rset = result_sets[0];
1659 *rset = rsmulti_and_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1660 num_result_sets, result_sets);
1666 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1667 const char **term_sub,
1669 oid_value attributeSet,
1670 struct grep_info *grep_info,
1680 char *term_tmp = term_dict + strlen(term_dict);
1683 attr_init(&relation, zapt, 2);
1684 relation_value = attr_find(&relation, NULL);
1686 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1688 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_tmp, 1,
1691 term_value = atoi (term_tmp);
1692 switch (relation_value)
1695 yaz_log(log_level_rpn, "Relation <");
1696 gen_regular_rel(term_tmp, term_value-1, 1);
1699 yaz_log(log_level_rpn, "Relation <=");
1700 gen_regular_rel(term_tmp, term_value, 1);
1703 yaz_log(log_level_rpn, "Relation >=");
1704 gen_regular_rel(term_tmp, term_value, 0);
1707 yaz_log(log_level_rpn, "Relation >");
1708 gen_regular_rel(term_tmp, term_value+1, 0);
1712 yaz_log(log_level_rpn, "Relation =");
1713 sprintf(term_tmp, "(0*%d)", term_value);
1719 yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_tmp);
1720 r = dict_lookup_grep(zh->reg->dict, term_dict, 0, grep_info, max_pos,
1723 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1724 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1728 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1729 const char **term_sub,
1730 oid_value attributeSet,
1731 struct grep_info *grep_info,
1732 int reg_type, int complete_flag,
1733 int num_bases, char **basenames,
1734 char *term_dst, int xpath_use, NMEM stream)
1736 char term_dict[2*IT_MAX_WORD+2];
1740 const char *use_string = 0;
1741 oid_value curAttributeSet = attributeSet;
1743 struct rpn_char_map_info rcmi;
1745 int bases_ok = 0; /* no of databases with OK attribute */
1746 int errCode = 0; /* err code (if any is not OK) */
1747 char *errString = 0; /* addinfo */
1749 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1750 attr_init(&use, zapt, 1);
1751 use_value = attr_find_ex(&use, &curAttributeSet, &use_string);
1753 if (use_value == -1)
1756 for (base_no = 0; base_no < num_bases; base_no++)
1759 data1_local_attribute id_xpath_attr;
1760 data1_local_attribute *local_attr;
1761 int max_pos, prefix_len = 0;
1762 int relation_error = 0;
1765 if (use_value == -2) /* string attribute (assume IDXPATH/any) */
1767 use_value = xpath_use;
1768 attp.local_attributes = &id_xpath_attr;
1769 attp.attset_ordinal = VAL_IDXPATH;
1770 id_xpath_attr.next = 0;
1771 id_xpath_attr.local = use_value;
1773 else if (curAttributeSet == VAL_IDXPATH)
1775 attp.local_attributes = &id_xpath_attr;
1776 attp.attset_ordinal = VAL_IDXPATH;
1777 id_xpath_attr.next = 0;
1778 id_xpath_attr.local = use_value;
1782 if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
1785 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
1786 curAttributeSet, use_value, r);
1791 errString = nmem_strdup(stream, use_string);
1793 errString = nmem_strdup_i (stream, use_value);
1800 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1802 zh->errCode = 109; /* Database unavailable */
1803 zh->errString = basenames[base_no];
1806 for (local_attr = attp.local_attributes; local_attr;
1807 local_attr = local_attr->next)
1813 ord = zebraExplain_lookup_attr_su(zh->reg->zei,
1814 attp.attset_ordinal,
1819 term_dict[prefix_len++] = '|';
1821 term_dict[prefix_len++] = '(';
1823 ord_len = key_SU_encode (ord, ord_buf);
1824 for (i = 0; i<ord_len; i++)
1826 term_dict[prefix_len++] = 1;
1827 term_dict[prefix_len++] = ord_buf[i];
1833 errString = nmem_strdup_i(stream, use_value);
1837 term_dict[prefix_len++] = ')';
1838 term_dict[prefix_len++] = 1;
1839 term_dict[prefix_len++] = reg_type;
1840 yaz_log(YLOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
1841 term_dict[prefix_len] = '\0';
1842 if (!numeric_relation(zh, zapt, &termp, term_dict,
1843 attributeSet, grep_info, &max_pos, reg_type,
1844 term_dst, &relation_error))
1848 zh->errCode = relation_error;
1858 zh->errCode = errCode;
1859 zh->errString = errString;
1863 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1867 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1868 Z_AttributesPlusTerm *zapt,
1870 oid_value attributeSet,
1872 int reg_type, int complete_flag,
1873 const char *rank_type, int xpath_use,
1874 int num_bases, char **basenames,
1878 char term_dst[IT_MAX_WORD+1];
1879 const char *termp = termz;
1880 RSET *result_sets = 0;
1881 int num_result_sets = 0;
1883 struct grep_info grep_info;
1886 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1887 if (grep_info_prepare(zh, zapt, &grep_info, reg_type, stream))
1891 if (alloc_sets == num_result_sets)
1894 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1897 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1898 alloc_sets = alloc_sets + add;
1901 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1902 grep_info.isam_p_indx = 0;
1903 res = numeric_term(zh, zapt, &termp, attributeSet, &grep_info,
1904 reg_type, complete_flag, num_bases, basenames,
1905 term_dst, xpath_use,
1907 if (res == ZEBRA_FAIL || termp == 0)
1909 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1910 result_sets[num_result_sets] =
1911 rset_trunc(zh, grep_info.isam_p_buf,
1912 grep_info.isam_p_indx, term_dst,
1913 strlen(term_dst), rank_type,
1914 0 /* preserve position */,
1915 zapt->term->which, rset_nmem,
1916 key_it_ctrl,key_it_ctrl->scope);
1917 if (!result_sets[num_result_sets])
1921 grep_info_delete(&grep_info);
1925 for (i = 0; i<num_result_sets; i++)
1926 rset_delete(result_sets[i]);
1929 if (num_result_sets == 0)
1930 *rset = rsnull_create(rset_nmem, key_it_ctrl);
1931 if (num_result_sets == 1)
1932 *rset = result_sets[0];
1934 *rset = rsmulti_and_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
1935 num_result_sets, result_sets);
1941 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1942 Z_AttributesPlusTerm *zapt,
1944 oid_value attributeSet,
1946 const char *rank_type, NMEM rset_nmem,
1952 *rset = rstemp_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
1953 res_get (zh->res, "setTmpDir"),0 );
1954 rsfd = rset_open(*rset, RSETF_WRITE);
1962 rset_write (rsfd, &key);
1967 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1968 oid_value attributeSet, NMEM stream,
1969 Z_SortKeySpecList *sort_sequence,
1970 const char *rank_type,
1974 int sort_relation_value;
1975 AttrType sort_relation_type;
1980 Z_AttributeElement *ae;
1985 attr_init(&sort_relation_type, zapt, 7);
1986 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1988 attr_init(&use_type, zapt, 1);
1989 use_value = attr_find(&use_type, &attributeSet);
1991 if (!sort_sequence->specs)
1993 sort_sequence->num_specs = 10;
1994 sort_sequence->specs = (Z_SortKeySpec **)
1995 nmem_malloc(stream, sort_sequence->num_specs *
1996 sizeof(*sort_sequence->specs));
1997 for (i = 0; i<sort_sequence->num_specs; i++)
1998 sort_sequence->specs[i] = 0;
2000 if (zapt->term->which != Z_Term_general)
2003 i = atoi_n ((char *) zapt->term->u.general->buf,
2004 zapt->term->u.general->len);
2005 if (i >= sort_sequence->num_specs)
2007 sprintf(termz, "%d", i);
2009 oe.proto = PROTO_Z3950;
2010 oe.oclass = CLASS_ATTSET;
2011 oe.value = attributeSet;
2012 if (!oid_ent_to_oid (&oe, oid))
2015 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2016 sks->sortElement = (Z_SortElement *)
2017 nmem_malloc(stream, sizeof(*sks->sortElement));
2018 sks->sortElement->which = Z_SortElement_generic;
2019 sk = sks->sortElement->u.generic = (Z_SortKey *)
2020 nmem_malloc(stream, sizeof(*sk));
2021 sk->which = Z_SortKey_sortAttributes;
2022 sk->u.sortAttributes = (Z_SortAttributes *)
2023 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2025 sk->u.sortAttributes->id = oid;
2026 sk->u.sortAttributes->list = (Z_AttributeList *)
2027 nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list));
2028 sk->u.sortAttributes->list->num_attributes = 1;
2029 sk->u.sortAttributes->list->attributes = (Z_AttributeElement **)
2030 nmem_malloc(stream, sizeof(*sk->u.sortAttributes->list->attributes));
2031 ae = *sk->u.sortAttributes->list->attributes = (Z_AttributeElement *)
2032 nmem_malloc(stream, sizeof(**sk->u.sortAttributes->list->attributes));
2033 ae->attributeSet = 0;
2034 ae->attributeType = (int *)
2035 nmem_malloc(stream, sizeof(*ae->attributeType));
2036 *ae->attributeType = 1;
2037 ae->which = Z_AttributeValue_numeric;
2038 ae->value.numeric = (int *)
2039 nmem_malloc(stream, sizeof(*ae->value.numeric));
2040 *ae->value.numeric = use_value;
2042 sks->sortRelation = (int *)
2043 nmem_malloc(stream, sizeof(*sks->sortRelation));
2044 if (sort_relation_value == 1)
2045 *sks->sortRelation = Z_SortKeySpec_ascending;
2046 else if (sort_relation_value == 2)
2047 *sks->sortRelation = Z_SortKeySpec_descending;
2049 *sks->sortRelation = Z_SortKeySpec_ascending;
2051 sks->caseSensitivity = (int *)
2052 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2053 *sks->caseSensitivity = 0;
2055 sks->which = Z_SortKeySpec_null;
2056 sks->u.null = odr_nullval ();
2057 sort_sequence->specs[i] = sks;
2058 *rset = rsnull_create (NULL, key_it_ctrl);
2063 static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2064 oid_value attributeSet,
2065 struct xpath_location_step *xpath, int max, NMEM mem)
2067 oid_value curAttributeSet = attributeSet;
2069 const char *use_string = 0;
2071 attr_init(&use, zapt, 1);
2072 attr_find_ex(&use, &curAttributeSet, &use_string);
2074 if (!use_string || *use_string != '/')
2077 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2082 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2083 int reg_type, const char *term, int use,
2084 oid_value curAttributeSet, NMEM rset_nmem)
2087 struct grep_info grep_info;
2088 char term_dict[2048];
2091 int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
2092 int ord_len, i, r, max_pos;
2093 int term_type = Z_Term_characterString;
2094 const char *flags = "void";
2096 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0', stream))
2097 return rsnull_create (rset_nmem,key_it_ctrl);
2100 return rsnull_create (rset_nmem,key_it_ctrl);
2102 term_dict[prefix_len++] = '|';
2104 term_dict[prefix_len++] = '(';
2106 ord_len = key_SU_encode (ord, ord_buf);
2107 for (i = 0; i<ord_len; i++)
2109 term_dict[prefix_len++] = 1;
2110 term_dict[prefix_len++] = ord_buf[i];
2112 term_dict[prefix_len++] = ')';
2113 term_dict[prefix_len++] = 1;
2114 term_dict[prefix_len++] = reg_type;
2116 strcpy(term_dict+prefix_len, term);
2118 grep_info.isam_p_indx = 0;
2119 r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
2120 &grep_info, &max_pos, 0, grep_handle);
2121 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2122 grep_info.isam_p_indx);
2123 rset = rset_trunc(zh, grep_info.isam_p_buf,
2124 grep_info.isam_p_indx, term, strlen(term),
2125 flags, 1, term_type,rset_nmem,
2126 key_it_ctrl, key_it_ctrl->scope);
2127 grep_info_delete(&grep_info);
2131 static RSET rpn_search_xpath(ZebraHandle zh,
2132 oid_value attributeSet,
2133 int num_bases, char **basenames,
2134 NMEM stream, const char *rank_type, RSET rset,
2135 int xpath_len, struct xpath_location_step *xpath,
2138 oid_value curAttributeSet = attributeSet;
2145 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2146 for (i = 0; i<xpath_len; i++)
2148 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2152 curAttributeSet = VAL_IDXPATH;
2162 a[@attr = value]/b[@other = othervalue]
2164 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2165 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2166 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2167 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2168 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2169 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2173 dict_grep_cmap (zh->reg->dict, 0, 0);
2175 for (base_no = 0; base_no < num_bases; base_no++)
2177 int level = xpath_len;
2180 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2182 zh->errCode = 109; /* Database unavailable */
2183 zh->errString = basenames[base_no];
2186 while (--level >= 0)
2188 char xpath_rev[128];
2190 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2194 for (i = level; i >= 1; --i)
2196 const char *cp = xpath[i].part;
2202 memcpy (xpath_rev + len, "[^/]*", 5);
2205 else if (*cp == ' ')
2208 xpath_rev[len++] = 1;
2209 xpath_rev[len++] = ' ';
2213 xpath_rev[len++] = *cp;
2214 xpath_rev[len++] = '/';
2216 else if (i == 1) /* // case */
2218 xpath_rev[len++] = '.';
2219 xpath_rev[len++] = '*';
2224 if (xpath[level].predicate &&
2225 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2226 xpath[level].predicate->u.relation.name[0])
2228 WRBUF wbuf = wrbuf_alloc();
2229 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2230 if (xpath[level].predicate->u.relation.value)
2232 const char *cp = xpath[level].predicate->u.relation.value;
2233 wrbuf_putc(wbuf, '=');
2237 if (strchr(REGEX_CHARS, *cp))
2238 wrbuf_putc(wbuf, '\\');
2239 wrbuf_putc(wbuf, *cp);
2243 wrbuf_puts(wbuf, "");
2244 rset_attr = xpath_trunc(
2245 zh, stream, '0', wrbuf_buf(wbuf), 3,
2246 curAttributeSet,rset_nmem);
2247 wrbuf_free(wbuf, 1);
2254 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level, xpath_rev);
2255 if (strlen(xpath_rev))
2257 rset_start_tag = xpath_trunc(zh, stream, '0',
2258 xpath_rev, 1, curAttributeSet, rset_nmem);
2260 rset_end_tag = xpath_trunc(zh, stream, '0',
2261 xpath_rev, 2, curAttributeSet, rset_nmem);
2263 rset = rsbetween_create(rset_nmem, key_it_ctrl,
2265 rset_start_tag, rset,
2266 rset_end_tag, rset_attr);
2277 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2278 oid_value attributeSet, NMEM stream,
2279 Z_SortKeySpecList *sort_sequence,
2280 int num_bases, char **basenames,
2284 ZEBRA_RES res = ZEBRA_OK;
2286 char *search_type = NULL;
2287 char rank_type[128];
2290 char termz[IT_MAX_WORD+1];
2293 struct xpath_location_step xpath[10];
2297 log_level_rpn = yaz_log_module_level("rpn");
2300 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2301 rank_type, &complete_flag, &sort_flag);
2303 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2304 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2305 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2306 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2308 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2312 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2314 xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream);
2318 if (xpath[xpath_len-1].part[0] == '@')
2322 if (!strcmp(search_type, "phrase"))
2324 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2325 reg_id, complete_flag, rank_type,
2327 num_bases, basenames, rset_nmem,
2330 else if (!strcmp(search_type, "and-list"))
2332 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2333 reg_id, complete_flag, rank_type,
2335 num_bases, basenames, rset_nmem,
2338 else if (!strcmp(search_type, "or-list"))
2340 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2341 reg_id, complete_flag, rank_type,
2343 num_bases, basenames, rset_nmem,
2346 else if (!strcmp(search_type, "local"))
2348 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2349 rank_type, rset_nmem, rset);
2351 else if (!strcmp(search_type, "numeric"))
2353 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2354 reg_id, complete_flag, rank_type,
2356 num_bases, basenames, rset_nmem, rset);
2363 if (res != ZEBRA_OK)
2367 *rset = rpn_search_xpath(zh, attributeSet, num_bases, basenames,
2368 stream, rank_type, *rset,
2369 xpath_len, xpath, rset_nmem);
2375 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2376 oid_value attributeSet,
2377 NMEM stream, NMEM rset_nmem,
2378 Z_SortKeySpecList *sort_sequence,
2379 int num_bases, char **basenames,
2380 RSET **result_sets, int *num_result_sets,
2381 Z_Operator *parent_op);
2383 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2384 oid_value attributeSet,
2385 NMEM stream, NMEM rset_nmem,
2386 Z_SortKeySpecList *sort_sequence,
2387 int num_bases, char **basenames,
2390 RSET *result_sets = 0;
2391 int num_result_sets = 0;
2392 ZEBRA_RES res = rpn_search_structure(zh, zs, attributeSet,
2395 num_bases, basenames,
2396 &result_sets, &num_result_sets,
2398 if (res != ZEBRA_OK)
2401 for (i = 0; i<num_result_sets; i++)
2402 rset_delete(result_sets[i]);
2406 assert(num_result_sets == 1);
2407 assert(result_sets);
2408 assert(*result_sets);
2409 *result_set = *result_sets;
2413 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2414 oid_value attributeSet,
2415 NMEM stream, NMEM rset_nmem,
2416 Z_SortKeySpecList *sort_sequence,
2417 int num_bases, char **basenames,
2418 RSET **result_sets, int *num_result_sets,
2419 Z_Operator *parent_op)
2421 *num_result_sets = 0;
2422 if (zs->which == Z_RPNStructure_complex)
2425 Z_Operator *zop = zs->u.complex->roperator;
2426 RSET *result_sets_l = 0;
2427 int num_result_sets_l = 0;
2428 RSET *result_sets_r = 0;
2429 int num_result_sets_r = 0;
2431 res = rpn_search_structure(zh, zs->u.complex->s1,
2432 attributeSet, stream, rset_nmem,
2434 num_bases, basenames,
2435 &result_sets_l, &num_result_sets_l,
2437 if (res != ZEBRA_OK)
2440 for (i = 0; i<num_result_sets_l; i++)
2441 rset_delete(result_sets_l[i]);
2444 res = rpn_search_structure(zh, zs->u.complex->s2,
2445 attributeSet, stream, rset_nmem,
2447 num_bases, basenames,
2448 &result_sets_r, &num_result_sets_r,
2450 if (res != ZEBRA_OK)
2453 for (i = 0; i<num_result_sets_l; i++)
2454 rset_delete(result_sets_l[i]);
2455 for (i = 0; i<num_result_sets_r; i++)
2456 rset_delete(result_sets_r[i]);
2460 /* make a new list of result for all children */
2461 *num_result_sets = num_result_sets_l + num_result_sets_r;
2462 *result_sets = nmem_malloc(stream, *num_result_sets *
2463 sizeof(**result_sets));
2464 memcpy(*result_sets, result_sets_l,
2465 num_result_sets_l * sizeof(**result_sets));
2466 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2467 num_result_sets_r * sizeof(**result_sets));
2469 if (!parent_op || parent_op->which != zop->which
2470 || (zop->which != Z_Operator_and &&
2471 zop->which != Z_Operator_or))
2473 /* parent node different from this one (or non-present) */
2474 /* we must combine result sets now */
2478 case Z_Operator_and:
2479 rset = rsmulti_and_create(rset_nmem, key_it_ctrl,
2481 *num_result_sets, *result_sets);
2484 rset = rsmulti_or_create(rset_nmem, key_it_ctrl,
2486 *num_result_sets, *result_sets);
2488 case Z_Operator_and_not:
2489 rset = rsbool_create_not(rset_nmem, key_it_ctrl,
2494 case Z_Operator_prox:
2495 if (zop->u.prox->which != Z_ProximityOperator_known)
2500 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2502 char *val = (char *) nmem_malloc(stream, 16);
2504 zh->errString = val;
2505 sprintf(val, "%d", *zop->u.prox->u.known);
2510 rset = rsprox_create(rset_nmem, key_it_ctrl,
2512 *num_result_sets, *result_sets,
2513 *zop->u.prox->ordered,
2514 (!zop->u.prox->exclusion ?
2515 0 : *zop->u.prox->exclusion),
2516 *zop->u.prox->relationType,
2517 *zop->u.prox->distance );
2524 *num_result_sets = 1;
2525 *result_sets = nmem_malloc(stream, *num_result_sets *
2526 sizeof(**result_sets));
2527 (*result_sets)[0] = rset;
2530 else if (zs->which == Z_RPNStructure_simple)
2535 if (zs->u.simple->which == Z_Operand_APT)
2537 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2538 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2539 attributeSet, stream, sort_sequence,
2540 num_bases, basenames, rset_nmem, &rset);
2541 if (res != ZEBRA_OK)
2544 else if (zs->u.simple->which == Z_Operand_resultSetId)
2546 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2547 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2552 nmem_strdup(stream, zs->u.simple->u.resultSetId);
2562 *num_result_sets = 1;
2563 *result_sets = nmem_malloc(stream, *num_result_sets *
2564 sizeof(**result_sets));
2565 (*result_sets)[0] = rset;
2575 struct scan_info_entry {
2581 struct scan_info_entry *list;
2587 static int scan_handle (char *name, const char *info, int pos, void *client)
2589 int len_prefix, idx;
2590 struct scan_info *scan_info = (struct scan_info *) client;
2592 len_prefix = strlen(scan_info->prefix);
2593 if (memcmp (name, scan_info->prefix, len_prefix))
2595 if (pos > 0) idx = scan_info->after - pos + scan_info->before;
2598 scan_info->list[idx].term = (char *)
2599 odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
2600 strcpy(scan_info->list[idx].term, name + len_prefix);
2601 assert (*info == sizeof(ISAM_P));
2602 memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
2606 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
2607 char **dst, const char *src)
2609 char term_src[IT_MAX_WORD];
2610 char term_dst[IT_MAX_WORD];
2612 term_untrans (zh, reg_type, term_src, src);
2614 if (zh->iconv_from_utf8 != 0)
2617 char *inbuf = term_src;
2618 size_t inleft = strlen(term_src);
2619 char *outbuf = term_dst;
2620 size_t outleft = sizeof(term_dst)-1;
2623 ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
2625 if (ret == (size_t)(-1))
2628 len = outbuf - term_dst;
2629 *dst = nmem_malloc(stream, len + 1);
2631 memcpy (*dst, term_dst, len);
2635 *dst = nmem_strdup(stream, term_src);
2638 static void count_set (RSET r, int *count)
2645 yaz_log(YLOG_DEBUG, "count_set");
2648 rfd = rset_open (r, RSETF_READ);
2649 while (rset_read (rfd, &key,0 /* never mind terms */))
2651 if (key.mem[0] != psysno)
2653 psysno = key.mem[0];
2659 yaz_log(YLOG_DEBUG, "%d keys, %d records", kno, *count);
2662 ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
2663 oid_value attributeset,
2664 int num_bases, char **basenames,
2665 int *position, int *num_entries, ZebraScanEntry **list,
2666 int *is_partial, RSET limit_set, int return_zero)
2669 int pos = *position;
2670 int num = *num_entries;
2674 char termz[IT_MAX_WORD+20];
2677 const char *use_string = 0;
2678 struct scan_info *scan_info_array;
2679 ZebraScanEntry *glist;
2680 int ords[32], ord_no = 0;
2683 int bases_ok = 0; /* no of databases with OK attribute */
2684 int errCode = 0; /* err code (if any is not OK) */
2685 char *errString = 0; /* addinfo */
2688 char *search_type = NULL;
2689 char rank_type[128];
2692 NMEM rset_nmem = NULL;
2697 if (attributeset == VAL_NONE)
2698 attributeset = VAL_BIB1;
2703 int termset_value_numeric;
2704 const char *termset_value_string;
2705 attr_init(&termset, zapt, 8);
2706 termset_value_numeric =
2707 attr_find_ex(&termset, NULL, &termset_value_string);
2708 if (termset_value_numeric != -1)
2711 const char *termset_name = 0;
2713 if (termset_value_numeric != -2)
2716 sprintf(resname, "%d", termset_value_numeric);
2717 termset_name = resname;
2720 termset_name = termset_value_string;
2722 limit_set = resultSetRef (zh, termset_name);
2726 yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d",
2727 pos, num, attributeset);
2729 attr_init(&use, zapt, 1);
2730 use_value = attr_find_ex(&use, &attributeset, &use_string);
2732 if (zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2733 rank_type, &complete_flag, &sort_flag))
2739 yaz_log(YLOG_DEBUG, "use_value = %d", use_value);
2741 if (use_value == -1)
2743 for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
2745 data1_local_attribute *local_attr;
2749 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2751 zh->errString = basenames[base_no];
2752 zh->errCode = 109; /* Database unavailable */
2758 (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2761 /* we have a match for a raw string attribute */
2763 ords[ord_no++] = ord;
2764 attp.local_attributes = 0; /* no more attributes */
2770 if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
2773 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
2774 attributeset, use_value);
2779 errString = odr_strdup(stream, use_string);
2783 sprintf(val_str, "%d", use_value);
2784 errString = odr_strdup(stream, val_str);
2793 for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
2794 local_attr = local_attr->next)
2796 ord = zebraExplain_lookup_attr_su(zh->reg->zei,
2797 attp.attset_ordinal,
2800 ords[ord_no++] = ord;
2803 if (!bases_ok && errCode)
2805 zh->errCode = errCode;
2806 zh->errString = errString;
2815 /* prepare dictionary scanning */
2818 scan_info_array = (struct scan_info *)
2819 odr_malloc(stream, ord_no * sizeof(*scan_info_array));
2820 for (i = 0; i < ord_no; i++)
2822 int j, prefix_len = 0;
2823 int before_tmp = before, after_tmp = after;
2824 struct scan_info *scan_info = scan_info_array + i;
2825 struct rpn_char_map_info rcmi;
2827 rpn_char_map_prepare (zh->reg, reg_id, &rcmi);
2829 scan_info->before = before;
2830 scan_info->after = after;
2831 scan_info->odr = stream;
2833 scan_info->list = (struct scan_info_entry *)
2834 odr_malloc(stream, (before+after) * sizeof(*scan_info->list));
2835 for (j = 0; j<before+after; j++)
2836 scan_info->list[j].term = NULL;
2838 prefix_len += key_SU_encode (ords[i], termz + prefix_len);
2839 termz[prefix_len++] = reg_id;
2840 termz[prefix_len] = 0;
2841 strcpy(scan_info->prefix, termz);
2843 if (trans_scan_term(zh, zapt, termz+prefix_len, reg_id) == ZEBRA_FAIL)
2846 dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp,
2847 scan_info, scan_handle);
2849 glist = (ZebraScanEntry *)
2850 odr_malloc(stream, (before+after)*sizeof(*glist));
2852 rset_nmem = nmem_create();
2854 /* consider terms after main term */
2855 for (i = 0; i < ord_no; i++)
2859 for (i = 0; i<after; i++)
2862 const char *mterm = NULL;
2866 for (j = 0; j < ord_no; j++)
2868 if (ptr[j] < before+after &&
2869 (tst = scan_info_array[j].list[ptr[j]].term) &&
2870 (!mterm || strcmp (tst, mterm) < 0))
2878 scan_term_untrans(zh, stream->mem, reg_id,
2879 &glist[i+before].term, mterm);
2880 rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
2881 glist[i+before].term, strlen(glist[i+before].term),
2882 NULL, 0, zapt->term->which, rset_nmem,
2883 key_it_ctrl,key_it_ctrl->scope);
2885 for (j = j0+1; j<ord_no; j++)
2887 if (ptr[j] < before+after &&
2888 (tst = scan_info_array[j].list[ptr[j]].term) &&
2889 !strcmp (tst, mterm))
2895 rset_trunc(zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
2896 glist[i+before].term,
2897 strlen(glist[i+before].term), NULL, 0,
2898 zapt->term->which,rset_nmem,
2899 key_it_ctrl, key_it_ctrl->scope);
2900 rset = rsmulti_or_create(rset_nmem, key_it_ctrl,
2901 2, key_it_ctrl->scope, rsets);
2909 rsets[1] = rset_dup(limit_set);
2911 rset = rsmulti_and_create(rset_nmem, key_it_ctrl,
2912 key_it_ctrl->scope, 2, rsets);
2914 count_set(rset, &glist[i+before].occurrences);
2919 *num_entries -= (after-i);
2923 /* consider terms before main term */
2924 for (i = 0; i<ord_no; i++)
2927 for (i = 0; i<before; i++)
2930 const char *mterm = NULL;
2934 for (j = 0; j <ord_no; j++)
2936 if (ptr[j] < before &&
2937 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2938 (!mterm || strcmp (tst, mterm) > 0))
2947 scan_term_untrans (zh, stream->mem, reg_id,
2948 &glist[before-1-i].term, mterm);
2951 (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
2952 glist[before-1-i].term, strlen(glist[before-1-i].term),
2953 NULL, 0, zapt->term->which,rset_nmem,
2954 key_it_ctrl,key_it_ctrl->scope);
2958 for (j = j0+1; j<ord_no; j++)
2960 if (ptr[j] < before &&
2961 (tst = scan_info_array[j].list[before-1-ptr[j]].term) &&
2962 !strcmp (tst, mterm))
2967 rsets[1] = rset_trunc(
2969 &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
2970 glist[before-1-i].term,
2971 strlen(glist[before-1-i].term), NULL, 0,
2972 zapt->term->which, rset_nmem,
2973 key_it_ctrl, key_it_ctrl->scope);
2974 rset = rsmulti_or_create(rset_nmem, key_it_ctrl,
2975 2, key_it_ctrl->scope, rsets);
2984 rsets[1] = rset_dup(limit_set);
2986 rset = rsmulti_and_create(rset_nmem, key_it_ctrl,
2987 key_it_ctrl->scope, 2, rsets);
2989 count_set (rset, &glist[before-1-i].occurrences);
3000 nmem_destroy(rset_nmem);
3001 *list = glist + i; /* list is set to first 'real' entry */
3003 yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d",
3004 *position, *num_entries);