1 /* $Id: rpnsearch.c,v 1.13 2007-05-14 12:33:33 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, p->reg_type, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, int reg_type,
65 struct rpn_char_map_info *map_info)
67 map_info->zm = reg->zebra_maps;
68 map_info->reg_type = reg_type;
69 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
87 static int add_isam_p(const char *name, const char *info,
92 log_level_rpn = yaz_log_module_level("rpn");
95 /* we may have to stop this madness.. NOTE: -1 so that if
96 truncmax == trunxlimit we do *not* generate result sets */
97 if (p->isam_p_indx >= p->trunc_max - 1)
100 if (p->isam_p_indx == p->isam_p_size)
102 ISAM_P *new_isam_p_buf;
106 p->isam_p_size = 2*p->isam_p_size + 100;
107 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
111 memcpy(new_isam_p_buf, p->isam_p_buf,
112 p->isam_p_indx * sizeof(*p->isam_p_buf));
113 xfree(p->isam_p_buf);
115 p->isam_p_buf = new_isam_p_buf;
118 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
121 memcpy(new_term_no, p->isam_p_buf,
122 p->isam_p_indx * sizeof(*p->term_no));
125 p->term_no = new_term_no;
128 assert(*info == sizeof(*p->isam_p_buf));
129 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
134 char term_tmp[IT_MAX_WORD];
136 const char *index_name;
137 int len = key_SU_decode (&ord, (const unsigned char *) name);
139 zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len);
140 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
141 zebraExplain_lookup_ord(p->zh->reg->zei,
142 ord, 0 /* index_type */, &db, &index_name);
143 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
145 resultSetAddTerm(p->zh, p->termset, name[len], db,
146 index_name, term_tmp);
152 static int grep_handle(char *name, const char *info, void *p)
154 return add_isam_p(name, info, (struct grep_info *) p);
157 static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
158 const char *ct1, const char *ct2, int first)
160 const char *s1, *s0 = *src;
163 /* skip white space */
166 if (ct1 && strchr(ct1, *s0))
168 if (ct2 && strchr(ct2, *s0))
171 map = zebra_maps_input(zebra_maps, reg_type, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k<in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " []()|.*+?!"
209 /* term_100: handle term, where trunc = none(no operators at all) */
210 static int term_100(ZebraMaps zebra_maps, int reg_type,
211 const char **src, WRBUF term_dict, int space_split,
219 const char *space_start = 0;
220 const char *space_end = 0;
222 if (!term_pre(zebra_maps, reg_type, src, NULL, NULL, !space_split))
229 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
233 if (**map == *CHR_SPACE)
236 else /* complete subfield only. */
238 if (**map == *CHR_SPACE)
239 { /* save space mapping for later .. */
244 else if (space_start)
245 { /* reload last space */
246 while (space_start < space_end)
248 if (strchr(REGEX_CHARS, *space_start))
249 wrbuf_putc(term_dict, '\\');
250 dst_term[j++] = *space_start;
251 wrbuf_putc(term_dict, *space_start);
256 space_start = space_end = 0;
259 /* add non-space char */
261 memcpy(dst_term+j, s1, s0 - s1);
267 if (strchr(REGEX_CHARS, *s1))
268 wrbuf_putc(term_dict, '\\');
269 wrbuf_putc(term_dict, *s1);
276 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
278 wrbuf_puts(term_dict, map[0]);
286 /* term_101: handle term, where trunc = Process # */
287 static int term_101(ZebraMaps zebra_maps, int reg_type,
288 const char **src, WRBUF term_dict, int space_split,
296 if (!term_pre(zebra_maps, reg_type, src, "#", "#", !space_split))
304 wrbuf_puts(term_dict, ".*");
305 dst_term[j++] = *s0++;
311 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
313 if (space_split && **map == *CHR_SPACE)
317 /* add non-space char */
318 memcpy(dst_term+j, s1, s0 - s1);
324 if (strchr(REGEX_CHARS, *s1))
325 wrbuf_putc(term_dict, '\\');
326 wrbuf_putc(term_dict, *s1);
333 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
335 wrbuf_puts(term_dict, map[0]);
339 dst_term[j++] = '\0';
344 /* term_103: handle term, where trunc = re-2 (regular expressions) */
345 static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
346 WRBUF term_dict, int *errors, int space_split,
354 if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
357 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
358 isdigit(((const unsigned char *)s0)[1]))
360 *errors = s0[1] - '0';
367 if (strchr("^\\()[].*+?|-", *s0))
370 wrbuf_putc(term_dict, *s0);
378 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
380 if (space_split && **map == *CHR_SPACE)
383 /* add non-space char */
384 memcpy(dst_term+j, s1, s0 - s1);
391 if (strchr(REGEX_CHARS, *s1))
392 wrbuf_putc(term_dict, '\\');
393 wrbuf_putc(term_dict, *s1);
400 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
402 wrbuf_puts(term_dict, map[0]);
412 /* term_103: handle term, where trunc = re-1 (regular expressions) */
413 static int term_102(ZebraMaps zebra_maps, int reg_type, const char **src,
414 WRBUF term_dict, int space_split, char *dst_term)
416 return term_103(zebra_maps, reg_type, src, term_dict, NULL, space_split,
421 /* term_104: handle term, process # and ! */
422 static int term_104(ZebraMaps zebra_maps, int reg_type,
423 const char **src, WRBUF term_dict, int space_split,
431 if (!term_pre(zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
439 dst_term[j++] = *s0++;
440 if (*s0 >= '0' && *s0 <= '9')
443 while (*s0 >= '0' && *s0 <= '9')
445 limit = limit * 10 + (*s0 - '0');
446 dst_term[j++] = *s0++;
452 wrbuf_puts(term_dict, ".?");
457 wrbuf_puts(term_dict, ".*");
463 wrbuf_puts(term_dict, ".*");
464 dst_term[j++] = *s0++;
469 wrbuf_puts(term_dict, ".");
470 dst_term[j++] = *s0++;
476 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
478 if (space_split && **map == *CHR_SPACE)
482 /* add non-space char */
483 memcpy(dst_term+j, s1, s0 - s1);
489 if (strchr(REGEX_CHARS, *s1))
490 wrbuf_putc(term_dict, '\\');
491 wrbuf_putc(term_dict, *s1);
498 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
500 wrbuf_puts(term_dict, map[0]);
504 dst_term[j++] = '\0';
509 /* term_105/106: handle term, where trunc = Process * and ! and right trunc */
510 static int term_105(ZebraMaps zebra_maps, int reg_type,
511 const char **src, WRBUF term_dict, int space_split,
512 char *dst_term, int right_truncate)
519 if (!term_pre(zebra_maps, reg_type, src, "*!", "*!", !space_split))
527 wrbuf_puts(term_dict, ".*");
528 dst_term[j++] = *s0++;
533 wrbuf_putc(term_dict, '.');
534 dst_term[j++] = *s0++;
540 map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0),
542 if (space_split && **map == *CHR_SPACE)
546 /* add non-space char */
547 memcpy(dst_term+j, s1, s0 - s1);
553 if (strchr(REGEX_CHARS, *s1))
554 wrbuf_putc(term_dict, '\\');
555 wrbuf_putc(term_dict, *s1);
562 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
564 wrbuf_puts(term_dict, map[0]);
569 wrbuf_puts(term_dict, ".*");
570 dst_term[j++] = '\0';
576 /* gen_regular_rel - generate regular expression from relation
577 * val: border value (inclusive)
578 * islt: 1 if <=; 0 if >=.
580 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
582 char dst_buf[20*5*20]; /* assuming enough for expansion */
589 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
593 strcpy(dst, "(-[0-9]+|(");
601 strcpy(dst, "([0-9]+|-(");
612 sprintf(numstr, "%d", val);
613 for (w = strlen(numstr); --w >= 0; pos++)
632 strcpy(dst + dst_p, numstr);
633 dst_p = strlen(dst) - pos - 1;
661 for (i = 0; i<pos; i++)
674 /* match everything less than 10^(pos-1) */
676 for (i = 1; i<pos; i++)
677 strcat(dst, "[0-9]?");
681 /* match everything greater than 10^pos */
682 for (i = 0; i <= pos; i++)
683 strcat(dst, "[0-9]");
684 strcat(dst, "[0-9]*");
687 wrbuf_puts(term_dict, dst);
690 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
692 const char *src = wrbuf_cstr(wsrc);
693 if (src[*indx] == '\\')
695 wrbuf_putc(term_p, src[*indx]);
698 wrbuf_putc(term_p, src[*indx]);
703 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
704 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
705 * >= abc ([b-].*|a[c-].*|ab[c-].*)
706 * ([^-a].*|a[^-b].*|ab[c-].*)
707 * < abc ([-0].*|a[-a].*|ab[-b].*)
708 * ([^a-].*|a[^b-].*|ab[^c-].*)
709 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
710 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
712 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
713 const char **term_sub, WRBUF term_dict,
714 const Odr_oid *attributeSet,
715 int reg_type, int space_split, char *term_dst,
721 WRBUF term_component = wrbuf_alloc();
723 attr_init_APT(&relation, zapt, 2);
724 relation_value = attr_find(&relation, NULL);
727 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
728 switch (relation_value)
731 if (!term_100(zh->reg->zebra_maps, reg_type,
732 term_sub, term_component,
733 space_split, term_dst))
735 wrbuf_destroy(term_component);
738 yaz_log(log_level_rpn, "Relation <");
740 wrbuf_putc(term_dict, '(');
741 for (i = 0; i < wrbuf_len(term_component); )
746 wrbuf_putc(term_dict, '|');
748 string_rel_add_char(term_dict, term_component, &j);
750 wrbuf_putc(term_dict, '[');
752 wrbuf_putc(term_dict, '^');
754 wrbuf_putc(term_dict, 1);
755 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
757 string_rel_add_char(term_dict, term_component, &i);
758 wrbuf_putc(term_dict, '-');
760 wrbuf_putc(term_dict, ']');
761 wrbuf_putc(term_dict, '.');
762 wrbuf_putc(term_dict, '*');
764 wrbuf_putc(term_dict, ')');
767 if (!term_100(zh->reg->zebra_maps, reg_type,
768 term_sub, term_component,
769 space_split, term_dst))
771 wrbuf_destroy(term_component);
774 yaz_log(log_level_rpn, "Relation <=");
776 wrbuf_putc(term_dict, '(');
777 for (i = 0; i < wrbuf_len(term_component); )
782 string_rel_add_char(term_dict, term_component, &j);
783 wrbuf_putc(term_dict, '[');
785 wrbuf_putc(term_dict, '^');
787 wrbuf_putc(term_dict, 1);
788 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
790 string_rel_add_char(term_dict, term_component, &i);
791 wrbuf_putc(term_dict, '-');
793 wrbuf_putc(term_dict, ']');
794 wrbuf_putc(term_dict, '.');
795 wrbuf_putc(term_dict, '*');
797 wrbuf_putc(term_dict, '|');
799 for (i = 0; i < wrbuf_len(term_component); )
800 string_rel_add_char(term_dict, term_component, &i);
801 wrbuf_putc(term_dict, ')');
804 if (!term_100 (zh->reg->zebra_maps, reg_type,
805 term_sub, term_component, space_split, term_dst))
807 wrbuf_destroy(term_component);
810 yaz_log(log_level_rpn, "Relation >");
812 wrbuf_putc(term_dict, '(');
813 for (i = 0; i < wrbuf_len(term_component); )
818 string_rel_add_char(term_dict, term_component, &j);
819 wrbuf_putc(term_dict, '[');
821 wrbuf_putc(term_dict, '^');
822 wrbuf_putc(term_dict, '-');
823 string_rel_add_char(term_dict, term_component, &i);
825 wrbuf_putc(term_dict, ']');
826 wrbuf_putc(term_dict, '.');
827 wrbuf_putc(term_dict, '*');
829 wrbuf_putc(term_dict, '|');
831 for (i = 0; i < wrbuf_len(term_component); )
832 string_rel_add_char(term_dict, term_component, &i);
833 wrbuf_putc(term_dict, '.');
834 wrbuf_putc(term_dict, '+');
835 wrbuf_putc(term_dict, ')');
838 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
839 term_component, space_split, term_dst))
841 wrbuf_destroy(term_component);
844 yaz_log(log_level_rpn, "Relation >=");
846 wrbuf_putc(term_dict, '(');
847 for (i = 0; i < wrbuf_len(term_component); )
852 wrbuf_putc(term_dict, '|');
854 string_rel_add_char(term_dict, term_component, &j);
855 wrbuf_putc(term_dict, '[');
857 if (i < wrbuf_len(term_component)-1)
859 wrbuf_putc(term_dict, '^');
860 wrbuf_putc(term_dict, '-');
861 string_rel_add_char(term_dict, term_component, &i);
865 string_rel_add_char(term_dict, term_component, &i);
866 wrbuf_putc(term_dict, '-');
868 wrbuf_putc(term_dict, ']');
869 wrbuf_putc(term_dict, '.');
870 wrbuf_putc(term_dict, '*');
872 wrbuf_putc(term_dict, ')');
879 yaz_log(log_level_rpn, "Relation =");
880 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub,
881 term_component, space_split, term_dst))
883 wrbuf_destroy(term_component);
886 wrbuf_puts(term_dict, "(");
887 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
888 wrbuf_puts(term_dict, ")");
891 yaz_log(log_level_rpn, "Relation always matches");
892 /* skip to end of term (we don't care what it is) */
893 while (**term_sub != '\0')
897 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
898 wrbuf_destroy(term_component);
901 wrbuf_destroy(term_component);
905 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
906 const char **term_sub,
908 const Odr_oid *attributeSet, NMEM stream,
909 struct grep_info *grep_info,
910 int reg_type, int complete_flag,
911 int num_bases, char **basenames,
913 const char *xpath_use,
914 struct ord_list **ol);
916 static ZEBRA_RES term_limits_APT(ZebraHandle zh,
917 Z_AttributesPlusTerm *zapt,
918 zint *hits_limit_value,
919 const char **term_ref_id_str,
922 AttrType term_ref_id_attr;
923 AttrType hits_limit_attr;
926 attr_init_APT(&hits_limit_attr, zapt, 11);
927 *hits_limit_value = attr_find(&hits_limit_attr, NULL);
929 attr_init_APT(&term_ref_id_attr, zapt, 10);
930 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
931 if (term_ref_id_int >= 0)
933 char *res = nmem_malloc(nmem, 20);
934 sprintf(res, "%d", term_ref_id_int);
935 *term_ref_id_str = res;
938 /* no limit given ? */
939 if (*hits_limit_value == -1)
941 if (*term_ref_id_str)
943 /* use global if term_ref is present */
944 *hits_limit_value = zh->approx_limit;
948 /* no counting if term_ref is not present */
949 *hits_limit_value = 0;
952 else if (*hits_limit_value == 0)
954 /* 0 is the same as global limit */
955 *hits_limit_value = zh->approx_limit;
957 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
958 *term_ref_id_str ? *term_ref_id_str : "none",
963 static ZEBRA_RES term_trunc(ZebraHandle zh,
964 Z_AttributesPlusTerm *zapt,
965 const char **term_sub,
966 const Odr_oid *attributeSet, NMEM stream,
967 struct grep_info *grep_info,
968 int reg_type, int complete_flag,
969 int num_bases, char **basenames,
971 const char *rank_type,
972 const char *xpath_use,
975 struct rset_key_control *kc)
979 zint hits_limit_value;
980 const char *term_ref_id_str = 0;
981 WRBUF term_dict = wrbuf_alloc();
984 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
985 grep_info->isam_p_indx = 0;
986 res = string_term(zh, zapt, term_sub, term_dict,
987 attributeSet, stream, grep_info,
988 reg_type, complete_flag, num_bases, basenames,
989 term_dst, xpath_use, &ol);
990 wrbuf_destroy(term_dict);
993 if (!*term_sub) /* no more terms ? */
995 yaz_log(log_level_rpn, "term: %s", term_dst);
996 *rset = rset_trunc(zh, grep_info->isam_p_buf,
997 grep_info->isam_p_indx, term_dst,
998 strlen(term_dst), rank_type, 1 /* preserve pos */,
999 zapt->term->which, rset_nmem,
1000 kc, kc->scope, ol, reg_type, hits_limit_value,
1007 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1008 const char **term_sub,
1010 const Odr_oid *attributeSet, NMEM stream,
1011 struct grep_info *grep_info,
1012 int reg_type, int complete_flag,
1013 int num_bases, char **basenames,
1015 const char *xpath_use,
1016 struct ord_list **ol)
1019 AttrType truncation;
1020 int truncation_value;
1022 struct rpn_char_map_info rcmi;
1023 int space_split = complete_flag ? 0 : 1;
1025 int bases_ok = 0; /* no of databases with OK attribute */
1027 *ol = ord_list_create(stream);
1029 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1030 attr_init_APT(&truncation, zapt, 5);
1031 truncation_value = attr_find(&truncation, NULL);
1032 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1034 for (base_no = 0; base_no < num_bases; base_no++)
1037 int regex_range = 0;
1038 int max_pos, prefix_len = 0;
1043 termp = *term_sub; /* start of term for each database */
1046 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1048 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1049 basenames[base_no]);
1053 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1054 attributeSet, &ord) != ZEBRA_OK)
1058 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1062 *ol = ord_list_append(stream, *ol, ord);
1063 ord_len = key_SU_encode (ord, ord_buf);
1065 wrbuf_putc(term_dict, '(');
1067 for (i = 0; i<ord_len; i++)
1069 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1070 wrbuf_putc(term_dict, ord_buf[i]);
1072 wrbuf_putc(term_dict, ')');
1074 prefix_len = wrbuf_len(term_dict);
1076 switch (truncation_value)
1078 case -1: /* not specified */
1079 case 100: /* do not truncate */
1080 if (!string_relation(zh, zapt, &termp, term_dict,
1082 reg_type, space_split, term_dst,
1087 zebra_setError(zh, relation_error, 0);
1094 case 1: /* right truncation */
1095 wrbuf_putc(term_dict, '(');
1096 if (!term_100(zh->reg->zebra_maps, reg_type,
1097 &termp, term_dict, space_split, term_dst))
1102 wrbuf_puts(term_dict, ".*)");
1104 case 2: /* keft truncation */
1105 wrbuf_puts(term_dict, "(.*");
1106 if (!term_100(zh->reg->zebra_maps, reg_type,
1107 &termp, term_dict, space_split, term_dst))
1112 wrbuf_putc(term_dict, ')');
1114 case 3: /* left&right truncation */
1115 wrbuf_puts(term_dict, "(.*");
1116 if (!term_100(zh->reg->zebra_maps, reg_type,
1117 &termp, term_dict, space_split, term_dst))
1122 wrbuf_puts(term_dict, ".*)");
1124 case 101: /* process # in term */
1125 wrbuf_putc(term_dict, '(');
1126 if (!term_101(zh->reg->zebra_maps, reg_type,
1127 &termp, term_dict, space_split, term_dst))
1132 wrbuf_puts(term_dict, ")");
1134 case 102: /* Regexp-1 */
1135 wrbuf_putc(term_dict, '(');
1136 if (!term_102(zh->reg->zebra_maps, reg_type,
1137 &termp, term_dict, space_split, term_dst))
1142 wrbuf_putc(term_dict, ')');
1144 case 103: /* Regexp-2 */
1146 wrbuf_putc(term_dict, '(');
1147 if (!term_103(zh->reg->zebra_maps, reg_type,
1148 &termp, term_dict, ®ex_range,
1149 space_split, term_dst))
1154 wrbuf_putc(term_dict, ')');
1156 case 104: /* process # and ! in term */
1157 wrbuf_putc(term_dict, '(');
1158 if (!term_104(zh->reg->zebra_maps, reg_type,
1159 &termp, term_dict, space_split, term_dst))
1164 wrbuf_putc(term_dict, ')');
1166 case 105: /* process * and ! in term */
1167 wrbuf_putc(term_dict, '(');
1168 if (!term_105(zh->reg->zebra_maps, reg_type,
1169 &termp, term_dict, space_split, term_dst, 1))
1174 wrbuf_putc(term_dict, ')');
1176 case 106: /* process * and ! in term */
1177 wrbuf_putc(term_dict, '(');
1178 if (!term_105(zh->reg->zebra_maps, reg_type,
1179 &termp, term_dict, space_split, term_dst, 0))
1184 wrbuf_putc(term_dict, ')');
1187 zebra_setError_zint(zh,
1188 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1195 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1196 esc_str(buf, sizeof(buf), input, strlen(input));
1198 yaz_log(log_level_rpn, "dict_lookup_grep: %s",
1199 wrbuf_cstr(term_dict) + prefix_len);
1200 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1201 grep_info, &max_pos,
1202 ord_len /* number of "exact" chars */,
1205 zebra_set_partial_result(zh);
1207 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1212 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1218 static void grep_info_delete(struct grep_info *grep_info)
1221 xfree(grep_info->term_no);
1223 xfree(grep_info->isam_p_buf);
1226 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1227 Z_AttributesPlusTerm *zapt,
1228 struct grep_info *grep_info,
1232 grep_info->term_no = 0;
1234 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1235 grep_info->isam_p_size = 0;
1236 grep_info->isam_p_buf = NULL;
1238 grep_info->reg_type = reg_type;
1239 grep_info->termset = 0;
1245 attr_init_APT(&truncmax, zapt, 13);
1246 truncmax_value = attr_find(&truncmax, NULL);
1247 if (truncmax_value != -1)
1248 grep_info->trunc_max = truncmax_value;
1253 int termset_value_numeric;
1254 const char *termset_value_string;
1256 attr_init_APT(&termset, zapt, 8);
1257 termset_value_numeric =
1258 attr_find_ex(&termset, NULL, &termset_value_string);
1259 if (termset_value_numeric != -1)
1262 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1266 const char *termset_name = 0;
1267 if (termset_value_numeric != -2)
1270 sprintf(resname, "%d", termset_value_numeric);
1271 termset_name = resname;
1274 termset_name = termset_value_string;
1275 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1276 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1277 if (!grep_info->termset)
1279 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1289 \brief Create result set(s) for list of terms
1290 \param zh Zebra Handle
1291 \param zapt Attributes Plust Term (RPN leaf)
1292 \param termz term as used in query but converted to UTF-8
1293 \param attributeSet default attribute set
1294 \param stream memory for result
1295 \param reg_type register type ('w', 'p',..)
1296 \param complete_flag whether it's phrases or not
1297 \param rank_type term flags for ranking
1298 \param xpath_use use attribute for X-Path (-1 for no X-path)
1299 \param num_bases number of databases
1300 \param basenames array of databases
1301 \param rset_nmem memory for result sets
1302 \param result_sets output result set for each term in list (output)
1303 \param num_result_sets number of output result sets
1304 \param kc rset key control to be used for created result sets
1306 static ZEBRA_RES term_list_trunc(ZebraHandle zh,
1307 Z_AttributesPlusTerm *zapt,
1309 const Odr_oid *attributeSet,
1311 int reg_type, int complete_flag,
1312 const char *rank_type,
1313 const char *xpath_use,
1314 int num_bases, char **basenames,
1316 RSET **result_sets, int *num_result_sets,
1317 struct rset_key_control *kc)
1319 char term_dst[IT_MAX_WORD+1];
1320 struct grep_info grep_info;
1321 const char *termp = termz;
1324 *num_result_sets = 0;
1326 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1332 if (alloc_sets == *num_result_sets)
1335 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1338 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1339 alloc_sets = alloc_sets + add;
1340 *result_sets = rnew;
1342 res = term_trunc(zh, zapt, &termp, attributeSet,
1344 reg_type, complete_flag,
1345 num_bases, basenames,
1346 term_dst, rank_type,
1347 xpath_use, rset_nmem,
1348 &(*result_sets)[*num_result_sets],
1350 if (res != ZEBRA_OK)
1353 for (i = 0; i < *num_result_sets; i++)
1354 rset_delete((*result_sets)[i]);
1355 grep_info_delete (&grep_info);
1358 if ((*result_sets)[*num_result_sets] == 0)
1360 (*num_result_sets)++;
1365 grep_info_delete(&grep_info);
1369 static ZEBRA_RES rpn_search_APT_position(ZebraHandle zh,
1370 Z_AttributesPlusTerm *zapt,
1371 const Odr_oid *attributeSet,
1373 int num_bases, char **basenames,
1376 struct rset_key_control *kc)
1384 attr_init_APT(&position, zapt, 3);
1385 position_value = attr_find(&position, NULL);
1386 switch(position_value)
1395 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1400 if (!zebra_maps_is_first_in_field(zh->reg->zebra_maps, reg_type))
1402 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1407 if (!zh->reg->isamb && !zh->reg->isamc)
1409 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1413 f_set = xmalloc(sizeof(RSET) * num_bases);
1414 for (base_no = 0; base_no < num_bases; base_no++)
1418 char term_dict[100];
1423 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1425 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1426 basenames[base_no]);
1430 if (zebra_apt_get_ord(zh, zapt, reg_type, 0,
1431 attributeSet, &ord) != ZEBRA_OK)
1434 ord_len = key_SU_encode (ord, ord_buf);
1435 memcpy(term_dict, ord_buf, ord_len);
1436 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1437 val = dict_lookup(zh->reg->dict, term_dict);
1440 assert(*val == sizeof(ISAM_P));
1441 memcpy(&isam_p, val+1, sizeof(isam_p));
1445 f_set[num_sets++] = rsisamb_create(rset_nmem, kc, kc->scope,
1446 zh->reg->isamb, isam_p, 0);
1447 else if (zh->reg->isamc)
1448 f_set[num_sets++] = rsisamc_create(rset_nmem, kc, kc->scope,
1449 zh->reg->isamc, isam_p, 0);
1453 *rset = rset_create_or(rset_nmem, kc, kc->scope,
1454 0 /* termid */, num_sets, f_set);
1460 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1461 Z_AttributesPlusTerm *zapt,
1462 const char *termz_org,
1463 const Odr_oid *attributeSet,
1465 int reg_type, int complete_flag,
1466 const char *rank_type,
1467 const char *xpath_use,
1468 int num_bases, char **basenames,
1471 struct rset_key_control *kc)
1473 RSET *result_sets = 0;
1474 int num_result_sets = 0;
1476 term_list_trunc(zh, zapt, termz_org, attributeSet,
1477 stream, reg_type, complete_flag,
1478 rank_type, xpath_use,
1479 num_bases, basenames,
1481 &result_sets, &num_result_sets, kc);
1483 if (res != ZEBRA_OK)
1486 if (num_result_sets > 0)
1489 res = rpn_search_APT_position(zh, zapt, attributeSet,
1491 num_bases, basenames,
1492 rset_nmem, &first_set,
1494 if (res != ZEBRA_OK)
1498 RSET *nsets = nmem_malloc(stream,
1499 sizeof(RSET) * (num_result_sets+1));
1500 nsets[0] = first_set;
1501 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1502 result_sets = nsets;
1506 if (num_result_sets == 0)
1507 *rset = rset_create_null(rset_nmem, kc, 0);
1508 else if (num_result_sets == 1)
1509 *rset = result_sets[0];
1511 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1512 num_result_sets, result_sets,
1513 1 /* ordered */, 0 /* exclusion */,
1514 3 /* relation */, 1 /* distance */);
1520 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1521 Z_AttributesPlusTerm *zapt,
1522 const char *termz_org,
1523 const Odr_oid *attributeSet,
1525 int reg_type, int complete_flag,
1526 const char *rank_type,
1527 const char *xpath_use,
1528 int num_bases, char **basenames,
1531 struct rset_key_control *kc)
1533 RSET *result_sets = 0;
1534 int num_result_sets = 0;
1537 term_list_trunc(zh, zapt, termz_org, attributeSet,
1538 stream, reg_type, complete_flag,
1539 rank_type, xpath_use,
1540 num_bases, basenames,
1542 &result_sets, &num_result_sets, kc);
1543 if (res != ZEBRA_OK)
1546 for (i = 0; i<num_result_sets; i++)
1549 res = rpn_search_APT_position(zh, zapt, attributeSet,
1551 num_bases, basenames,
1552 rset_nmem, &first_set,
1554 if (res != ZEBRA_OK)
1556 for (i = 0; i<num_result_sets; i++)
1557 rset_delete(result_sets[i]);
1565 tmp_set[0] = first_set;
1566 tmp_set[1] = result_sets[i];
1568 result_sets[i] = rset_create_prox(
1569 rset_nmem, kc, kc->scope,
1571 1 /* ordered */, 0 /* exclusion */,
1572 3 /* relation */, 1 /* distance */);
1575 if (num_result_sets == 0)
1576 *rset = rset_create_null(rset_nmem, kc, 0);
1577 else if (num_result_sets == 1)
1578 *rset = result_sets[0];
1580 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1581 num_result_sets, result_sets);
1587 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1588 Z_AttributesPlusTerm *zapt,
1589 const char *termz_org,
1590 const Odr_oid *attributeSet,
1592 int reg_type, int complete_flag,
1593 const char *rank_type,
1594 const char *xpath_use,
1595 int num_bases, char **basenames,
1598 struct rset_key_control *kc)
1600 RSET *result_sets = 0;
1601 int num_result_sets = 0;
1604 term_list_trunc(zh, zapt, termz_org, attributeSet,
1605 stream, reg_type, complete_flag,
1606 rank_type, xpath_use,
1607 num_bases, basenames,
1609 &result_sets, &num_result_sets,
1611 if (res != ZEBRA_OK)
1613 for (i = 0; i<num_result_sets; i++)
1616 res = rpn_search_APT_position(zh, zapt, attributeSet,
1618 num_bases, basenames,
1619 rset_nmem, &first_set,
1621 if (res != ZEBRA_OK)
1623 for (i = 0; i<num_result_sets; i++)
1624 rset_delete(result_sets[i]);
1632 tmp_set[0] = first_set;
1633 tmp_set[1] = result_sets[i];
1635 result_sets[i] = rset_create_prox(
1636 rset_nmem, kc, kc->scope,
1638 1 /* ordered */, 0 /* exclusion */,
1639 3 /* relation */, 1 /* distance */);
1644 if (num_result_sets == 0)
1645 *rset = rset_create_null(rset_nmem, kc, 0);
1646 else if (num_result_sets == 1)
1647 *rset = result_sets[0];
1649 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1650 num_result_sets, result_sets);
1656 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1657 const char **term_sub,
1659 const Odr_oid *attributeSet,
1660 struct grep_info *grep_info,
1670 WRBUF term_num = wrbuf_alloc();
1673 attr_init_APT(&relation, zapt, 2);
1674 relation_value = attr_find(&relation, NULL);
1676 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1678 switch (relation_value)
1681 yaz_log(log_level_rpn, "Relation <");
1682 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1685 wrbuf_destroy(term_num);
1688 term_value = atoi (wrbuf_cstr(term_num));
1689 gen_regular_rel(term_dict, term_value-1, 1);
1692 yaz_log(log_level_rpn, "Relation <=");
1693 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1696 wrbuf_destroy(term_num);
1699 term_value = atoi (wrbuf_cstr(term_num));
1700 gen_regular_rel(term_dict, term_value, 1);
1703 yaz_log(log_level_rpn, "Relation >=");
1704 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1707 wrbuf_destroy(term_num);
1710 term_value = atoi (wrbuf_cstr(term_num));
1711 gen_regular_rel(term_dict, term_value, 0);
1714 yaz_log(log_level_rpn, "Relation >");
1715 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1718 wrbuf_destroy(term_num);
1721 term_value = atoi (wrbuf_cstr(term_num));
1722 gen_regular_rel(term_dict, term_value+1, 0);
1726 yaz_log(log_level_rpn, "Relation =");
1727 if (!term_100(zh->reg->zebra_maps, reg_type, term_sub, term_num, 1,
1730 wrbuf_destroy(term_num);
1733 term_value = atoi (wrbuf_cstr(term_num));
1734 wrbuf_printf(term_dict, "(0*%d)", term_value);
1737 /* term_tmp untouched.. */
1738 while (**term_sub != '\0')
1742 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1743 wrbuf_destroy(term_num);
1746 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1747 0, grep_info, max_pos, 0, grep_handle);
1750 zebra_set_partial_result(zh);
1752 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1753 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1754 wrbuf_destroy(term_num);
1758 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1759 const char **term_sub,
1761 const Odr_oid *attributeSet, NMEM stream,
1762 struct grep_info *grep_info,
1763 int reg_type, int complete_flag,
1764 int num_bases, char **basenames,
1766 const char *xpath_use,
1767 struct ord_list **ol)
1771 struct rpn_char_map_info rcmi;
1773 int bases_ok = 0; /* no of databases with OK attribute */
1775 *ol = ord_list_create(stream);
1777 rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
1779 for (base_no = 0; base_no < num_bases; base_no++)
1782 int relation_error = 0;
1783 int ord, ord_len, i;
1788 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
1790 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
1791 basenames[base_no]);
1795 if (zebra_apt_get_ord(zh, zapt, reg_type, xpath_use,
1796 attributeSet, &ord) != ZEBRA_OK)
1800 wrbuf_rewind(term_dict);
1802 *ol = ord_list_append(stream, *ol, ord);
1804 ord_len = key_SU_encode (ord, ord_buf);
1806 wrbuf_putc(term_dict, '(');
1807 for (i = 0; i < ord_len; i++)
1809 wrbuf_putc(term_dict, 1);
1810 wrbuf_putc(term_dict, ord_buf[i]);
1812 wrbuf_putc(term_dict, ')');
1814 if (!numeric_relation(zh, zapt, &termp, term_dict,
1815 attributeSet, grep_info, &max_pos, reg_type,
1816 term_dst, &relation_error))
1820 zebra_setError(zh, relation_error, 0);
1830 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1835 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1836 Z_AttributesPlusTerm *zapt,
1838 const Odr_oid *attributeSet,
1840 int reg_type, int complete_flag,
1841 const char *rank_type,
1842 const char *xpath_use,
1843 int num_bases, char **basenames,
1846 struct rset_key_control *kc)
1848 char term_dst[IT_MAX_WORD+1];
1849 const char *termp = termz;
1850 RSET *result_sets = 0;
1851 int num_result_sets = 0;
1853 struct grep_info grep_info;
1855 zint hits_limit_value;
1856 const char *term_ref_id_str = 0;
1858 term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, stream);
1860 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1861 if (grep_info_prepare(zh, zapt, &grep_info, reg_type) == ZEBRA_FAIL)
1865 struct ord_list *ol;
1866 WRBUF term_dict = wrbuf_alloc();
1867 if (alloc_sets == num_result_sets)
1870 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1873 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1874 alloc_sets = alloc_sets + add;
1877 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1878 grep_info.isam_p_indx = 0;
1879 res = numeric_term(zh, zapt, &termp, term_dict,
1880 attributeSet, stream, &grep_info,
1881 reg_type, complete_flag, num_bases, basenames,
1882 term_dst, xpath_use, &ol);
1883 wrbuf_destroy(term_dict);
1884 if (res == ZEBRA_FAIL || termp == 0)
1886 yaz_log(YLOG_DEBUG, "term: %s", term_dst);
1887 result_sets[num_result_sets] =
1888 rset_trunc(zh, grep_info.isam_p_buf,
1889 grep_info.isam_p_indx, term_dst,
1890 strlen(term_dst), rank_type,
1891 0 /* preserve position */,
1892 zapt->term->which, rset_nmem,
1893 kc, kc->scope, ol, reg_type,
1896 if (!result_sets[num_result_sets])
1902 grep_info_delete(&grep_info);
1904 if (res != ZEBRA_OK)
1906 if (num_result_sets == 0)
1907 *rset = rset_create_null(rset_nmem, kc, 0);
1908 else if (num_result_sets == 1)
1909 *rset = result_sets[0];
1911 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1912 num_result_sets, result_sets);
1918 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1919 Z_AttributesPlusTerm *zapt,
1921 const Odr_oid *attributeSet,
1923 const char *rank_type, NMEM rset_nmem,
1925 struct rset_key_control *kc)
1928 zint sysno = atozint(termz);
1932 rec = rec_get(zh->reg->records, sysno);
1940 *rset = rset_create_null(rset_nmem, kc, 0);
1946 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
1947 res_get(zh->res, "setTmpDir"), 0);
1948 rsfd = rset_open(*rset, RSETF_WRITE);
1953 rset_write(rsfd, &key);
1959 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1960 const Odr_oid *attributeSet, NMEM stream,
1961 Z_SortKeySpecList *sort_sequence,
1962 const char *rank_type,
1965 struct rset_key_control *kc)
1968 int sort_relation_value;
1969 AttrType sort_relation_type;
1974 attr_init_APT(&sort_relation_type, zapt, 7);
1975 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
1977 if (!sort_sequence->specs)
1979 sort_sequence->num_specs = 10;
1980 sort_sequence->specs = (Z_SortKeySpec **)
1981 nmem_malloc(stream, sort_sequence->num_specs *
1982 sizeof(*sort_sequence->specs));
1983 for (i = 0; i<sort_sequence->num_specs; i++)
1984 sort_sequence->specs[i] = 0;
1986 if (zapt->term->which != Z_Term_general)
1989 i = atoi_n ((char *) zapt->term->u.general->buf,
1990 zapt->term->u.general->len);
1991 if (i >= sort_sequence->num_specs)
1993 sprintf(termz, "%d", i);
1995 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
1996 sks->sortElement = (Z_SortElement *)
1997 nmem_malloc(stream, sizeof(*sks->sortElement));
1998 sks->sortElement->which = Z_SortElement_generic;
1999 sk = sks->sortElement->u.generic = (Z_SortKey *)
2000 nmem_malloc(stream, sizeof(*sk));
2001 sk->which = Z_SortKey_sortAttributes;
2002 sk->u.sortAttributes = (Z_SortAttributes *)
2003 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2005 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2006 sk->u.sortAttributes->list = zapt->attributes;
2008 sks->sortRelation = (int *)
2009 nmem_malloc(stream, sizeof(*sks->sortRelation));
2010 if (sort_relation_value == 1)
2011 *sks->sortRelation = Z_SortKeySpec_ascending;
2012 else if (sort_relation_value == 2)
2013 *sks->sortRelation = Z_SortKeySpec_descending;
2015 *sks->sortRelation = Z_SortKeySpec_ascending;
2017 sks->caseSensitivity = (int *)
2018 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2019 *sks->caseSensitivity = 0;
2021 sks->which = Z_SortKeySpec_null;
2022 sks->u.null = odr_nullval ();
2023 sort_sequence->specs[i] = sks;
2024 *rset = rset_create_null(rset_nmem, kc, 0);
2029 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2030 const Odr_oid *attributeSet,
2031 struct xpath_location_step *xpath, int max,
2034 const Odr_oid *curAttributeSet = attributeSet;
2036 const char *use_string = 0;
2038 attr_init_APT(&use, zapt, 1);
2039 attr_find_ex(&use, &curAttributeSet, &use_string);
2041 if (!use_string || *use_string != '/')
2044 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2049 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2050 int reg_type, const char *term,
2051 const char *xpath_use,
2053 struct rset_key_control *kc)
2055 struct grep_info grep_info;
2056 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2057 zinfo_index_category_index,
2058 reg_type, xpath_use);
2059 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, '0') == ZEBRA_FAIL)
2060 return rset_create_null(rset_nmem, kc, 0);
2063 return rset_create_null(rset_nmem, kc, 0);
2069 WRBUF term_dict = wrbuf_alloc();
2070 int ord_len = key_SU_encode (ord, ord_buf);
2071 int term_type = Z_Term_characterString;
2072 const char *flags = "void";
2074 wrbuf_putc(term_dict, '(');
2075 for (i = 0; i<ord_len; i++)
2077 wrbuf_putc(term_dict, 1);
2078 wrbuf_putc(term_dict, ord_buf[i]);
2080 wrbuf_putc(term_dict, ')');
2081 wrbuf_puts(term_dict, term);
2083 grep_info.isam_p_indx = 0;
2084 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2085 &grep_info, &max_pos, 0, grep_handle);
2086 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2087 grep_info.isam_p_indx);
2088 rset = rset_trunc(zh, grep_info.isam_p_buf,
2089 grep_info.isam_p_indx, term, strlen(term),
2090 flags, 1, term_type, rset_nmem,
2091 kc, kc->scope, 0, reg_type, 0 /* hits_limit */,
2092 0 /* term_ref_id_str */);
2093 grep_info_delete(&grep_info);
2094 wrbuf_destroy(term_dict);
2100 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2101 int num_bases, char **basenames,
2102 NMEM stream, const char *rank_type, RSET rset,
2103 int xpath_len, struct xpath_location_step *xpath,
2106 struct rset_key_control *kc)
2110 int always_matches = rset ? 0 : 1;
2118 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2119 for (i = 0; i<xpath_len; i++)
2121 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2133 a[@attr = value]/b[@other = othervalue]
2135 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2136 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2137 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2138 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2139 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2140 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2144 dict_grep_cmap (zh->reg->dict, 0, 0);
2146 for (base_no = 0; base_no < num_bases; base_no++)
2148 int level = xpath_len;
2151 if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
2153 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2154 basenames[base_no]);
2158 while (--level >= 0)
2160 WRBUF xpath_rev = wrbuf_alloc();
2162 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2164 for (i = level; i >= 1; --i)
2166 const char *cp = xpath[i].part;
2172 wrbuf_puts(xpath_rev, "[^/]*");
2173 else if (*cp == ' ')
2174 wrbuf_puts(xpath_rev, "\001 ");
2176 wrbuf_putc(xpath_rev, *cp);
2178 /* wrbuf_putc does not null-terminate , but
2179 wrbuf_puts below ensures it does.. so xpath_rev
2180 is OK iff length is > 0 */
2182 wrbuf_puts(xpath_rev, "/");
2184 else if (i == 1) /* // case */
2185 wrbuf_puts(xpath_rev, ".*");
2187 if (xpath[level].predicate &&
2188 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2189 xpath[level].predicate->u.relation.name[0])
2191 WRBUF wbuf = wrbuf_alloc();
2192 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2193 if (xpath[level].predicate->u.relation.value)
2195 const char *cp = xpath[level].predicate->u.relation.value;
2196 wrbuf_putc(wbuf, '=');
2200 if (strchr(REGEX_CHARS, *cp))
2201 wrbuf_putc(wbuf, '\\');
2202 wrbuf_putc(wbuf, *cp);
2206 rset_attr = xpath_trunc(
2207 zh, stream, '0', wrbuf_cstr(wbuf), ZEBRA_XPATH_ATTR_NAME,
2209 wrbuf_destroy(wbuf);
2215 wrbuf_destroy(xpath_rev);
2219 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2220 wrbuf_cstr(xpath_rev));
2221 if (wrbuf_len(xpath_rev))
2223 rset_start_tag = xpath_trunc(zh, stream, '0',
2224 wrbuf_cstr(xpath_rev),
2225 ZEBRA_XPATH_ELM_BEGIN,
2228 rset = rset_start_tag;
2231 rset_end_tag = xpath_trunc(zh, stream, '0',
2232 wrbuf_cstr(xpath_rev),
2233 ZEBRA_XPATH_ELM_END,
2236 rset = rset_create_between(rset_nmem, kc, kc->scope,
2237 rset_start_tag, rset,
2238 rset_end_tag, rset_attr);
2241 wrbuf_destroy(xpath_rev);
2249 #define MAX_XPATH_STEPS 10
2251 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2252 const Odr_oid *attributeSet, NMEM stream,
2253 Z_SortKeySpecList *sort_sequence,
2254 int num_bases, char **basenames,
2257 struct rset_key_control *kc)
2259 ZEBRA_RES res = ZEBRA_OK;
2261 char *search_type = NULL;
2262 char rank_type[128];
2265 char termz[IT_MAX_WORD+1];
2267 const char *xpath_use = 0;
2268 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2272 log_level_rpn = yaz_log_module_level("rpn");
2275 zebra_maps_attr(zh->reg->zebra_maps, zapt, ®_id, &search_type,
2276 rank_type, &complete_flag, &sort_flag);
2278 yaz_log(YLOG_DEBUG, "reg_id=%c", reg_id);
2279 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2280 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2281 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2283 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2287 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2288 rank_type, rset_nmem, rset, kc);
2289 /* consider if an X-Path query is used */
2290 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2291 xpath, MAX_XPATH_STEPS, stream);
2294 if (xpath[xpath_len-1].part[0] == '@')
2295 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2297 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2304 attr_init_APT(&relation, zapt, 2);
2305 relation_value = attr_find(&relation, NULL);
2307 if (relation_value == 103) /* alwaysmatches */
2309 *rset = 0; /* signal no "term" set */
2310 return rpn_search_xpath(zh, num_bases, basenames,
2311 stream, rank_type, *rset,
2312 xpath_len, xpath, rset_nmem, rset, kc);
2317 /* search using one of the various search type strategies
2318 termz is our UTF-8 search term
2319 attributeSet is top-level default attribute set
2320 stream is ODR for search
2321 reg_id is the register type
2322 complete_flag is 1 for complete subfield, 0 for incomplete
2323 xpath_use is use-attribute to be used for X-Path search, 0 for none
2325 if (!strcmp(search_type, "phrase"))
2327 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, stream,
2328 reg_id, complete_flag, rank_type,
2330 num_bases, basenames, rset_nmem,
2333 else if (!strcmp(search_type, "and-list"))
2335 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, stream,
2336 reg_id, complete_flag, rank_type,
2338 num_bases, basenames, rset_nmem,
2341 else if (!strcmp(search_type, "or-list"))
2343 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, stream,
2344 reg_id, complete_flag, rank_type,
2346 num_bases, basenames, rset_nmem,
2349 else if (!strcmp(search_type, "local"))
2351 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2352 rank_type, rset_nmem, rset, kc);
2354 else if (!strcmp(search_type, "numeric"))
2356 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream,
2357 reg_id, complete_flag, rank_type,
2359 num_bases, basenames, rset_nmem,
2364 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2367 if (res != ZEBRA_OK)
2371 return rpn_search_xpath(zh, num_bases, basenames,
2372 stream, rank_type, *rset,
2373 xpath_len, xpath, rset_nmem, rset, kc);
2376 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2377 const Odr_oid *attributeSet,
2378 NMEM stream, NMEM rset_nmem,
2379 Z_SortKeySpecList *sort_sequence,
2380 int num_bases, char **basenames,
2381 RSET **result_sets, int *num_result_sets,
2382 Z_Operator *parent_op,
2383 struct rset_key_control *kc);
2385 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2388 ZEBRA_RES res = ZEBRA_OK;
2389 if (zs->which == Z_RPNStructure_complex)
2391 if (res == ZEBRA_OK)
2392 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2394 if (res == ZEBRA_OK)
2395 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2398 else if (zs->which == Z_RPNStructure_simple)
2400 if (zs->u.simple->which == Z_Operand_APT)
2402 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2403 AttrType global_hits_limit_attr;
2406 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2408 l = attr_find(&global_hits_limit_attr, NULL);
2416 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2417 const Odr_oid *attributeSet,
2418 NMEM stream, NMEM rset_nmem,
2419 Z_SortKeySpecList *sort_sequence,
2420 int num_bases, char **basenames,
2423 RSET *result_sets = 0;
2424 int num_result_sets = 0;
2426 struct rset_key_control *kc = zebra_key_control_create(zh);
2428 res = rpn_search_structure(zh, zs, attributeSet,
2431 num_bases, basenames,
2432 &result_sets, &num_result_sets,
2433 0 /* no parent op */,
2435 if (res != ZEBRA_OK)
2438 for (i = 0; i<num_result_sets; i++)
2439 rset_delete(result_sets[i]);
2444 assert(num_result_sets == 1);
2445 assert(result_sets);
2446 assert(*result_sets);
2447 *result_set = *result_sets;
2453 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2454 const Odr_oid *attributeSet,
2455 NMEM stream, NMEM rset_nmem,
2456 Z_SortKeySpecList *sort_sequence,
2457 int num_bases, char **basenames,
2458 RSET **result_sets, int *num_result_sets,
2459 Z_Operator *parent_op,
2460 struct rset_key_control *kc)
2462 *num_result_sets = 0;
2463 if (zs->which == Z_RPNStructure_complex)
2466 Z_Operator *zop = zs->u.complex->roperator;
2467 RSET *result_sets_l = 0;
2468 int num_result_sets_l = 0;
2469 RSET *result_sets_r = 0;
2470 int num_result_sets_r = 0;
2472 res = rpn_search_structure(zh, zs->u.complex->s1,
2473 attributeSet, stream, rset_nmem,
2475 num_bases, basenames,
2476 &result_sets_l, &num_result_sets_l,
2478 if (res != ZEBRA_OK)
2481 for (i = 0; i<num_result_sets_l; i++)
2482 rset_delete(result_sets_l[i]);
2485 res = rpn_search_structure(zh, zs->u.complex->s2,
2486 attributeSet, stream, rset_nmem,
2488 num_bases, basenames,
2489 &result_sets_r, &num_result_sets_r,
2491 if (res != ZEBRA_OK)
2494 for (i = 0; i<num_result_sets_l; i++)
2495 rset_delete(result_sets_l[i]);
2496 for (i = 0; i<num_result_sets_r; i++)
2497 rset_delete(result_sets_r[i]);
2501 /* make a new list of result for all children */
2502 *num_result_sets = num_result_sets_l + num_result_sets_r;
2503 *result_sets = nmem_malloc(stream, *num_result_sets *
2504 sizeof(**result_sets));
2505 memcpy(*result_sets, result_sets_l,
2506 num_result_sets_l * sizeof(**result_sets));
2507 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2508 num_result_sets_r * sizeof(**result_sets));
2510 if (!parent_op || parent_op->which != zop->which
2511 || (zop->which != Z_Operator_and &&
2512 zop->which != Z_Operator_or))
2514 /* parent node different from this one (or non-present) */
2515 /* we must combine result sets now */
2519 case Z_Operator_and:
2520 rset = rset_create_and(rset_nmem, kc,
2522 *num_result_sets, *result_sets);
2525 rset = rset_create_or(rset_nmem, kc,
2526 kc->scope, 0, /* termid */
2527 *num_result_sets, *result_sets);
2529 case Z_Operator_and_not:
2530 rset = rset_create_not(rset_nmem, kc,
2535 case Z_Operator_prox:
2536 if (zop->u.prox->which != Z_ProximityOperator_known)
2539 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2543 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2545 zebra_setError_zint(zh,
2546 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2547 *zop->u.prox->u.known);
2552 rset = rset_create_prox(rset_nmem, kc,
2554 *num_result_sets, *result_sets,
2555 *zop->u.prox->ordered,
2556 (!zop->u.prox->exclusion ?
2557 0 : *zop->u.prox->exclusion),
2558 *zop->u.prox->relationType,
2559 *zop->u.prox->distance );
2563 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2566 *num_result_sets = 1;
2567 *result_sets = nmem_malloc(stream, *num_result_sets *
2568 sizeof(**result_sets));
2569 (*result_sets)[0] = rset;
2572 else if (zs->which == Z_RPNStructure_simple)
2577 if (zs->u.simple->which == Z_Operand_APT)
2579 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2580 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2581 attributeSet, stream, sort_sequence,
2582 num_bases, basenames, rset_nmem, &rset,
2584 if (res != ZEBRA_OK)
2587 else if (zs->u.simple->which == Z_Operand_resultSetId)
2589 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2590 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2594 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2595 zs->u.simple->u.resultSetId);
2602 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2605 *num_result_sets = 1;
2606 *result_sets = nmem_malloc(stream, *num_result_sets *
2607 sizeof(**result_sets));
2608 (*result_sets)[0] = rset;
2612 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2623 * indent-tabs-mode: nil
2625 * vim: shiftwidth=4 tabstop=8 expandtab