1 /* This file is part of the Zebra server.
2 Copyright (C) Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <yaz/diagbib1.h>
35 #include <zebra_xpath.h>
40 static int log_level_set = 0;
41 static int log_level_rpn = 0;
43 #define TERMSET_DISABLE 1
45 static const char **rpn_char_map_handler(void *vp, const char **from, int len)
47 struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
48 const char **out = zebra_maps_input(p->zm, from, len, 0);
52 const char *outp = *out;
53 yaz_log(YLOG_LOG, "---");
56 yaz_log(YLOG_LOG, "%02X", *outp);
64 void rpn_char_map_prepare(struct zebra_register *reg, zebra_map_t zm,
65 struct rpn_char_map_info *map_info)
68 if (zebra_maps_is_icu(zm))
69 dict_grep_cmap(reg->dict, 0, 0);
71 dict_grep_cmap(reg->dict, map_info, rpn_char_map_handler);
85 const char *index_type;
89 static int add_isam_p(const char *name, const char *info,
94 log_level_rpn = yaz_log_module_level("rpn");
97 /* we may have to stop this madness.. NOTE: -1 so that if
98 truncmax == trunxlimit we do *not* generate result sets */
99 if (p->isam_p_indx >= p->trunc_max - 1)
102 if (p->isam_p_indx == p->isam_p_size)
104 ISAM_P *new_isam_p_buf;
108 p->isam_p_size = 2*p->isam_p_size + 100;
109 new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
113 memcpy(new_isam_p_buf, p->isam_p_buf,
114 p->isam_p_indx * sizeof(*p->isam_p_buf));
115 xfree(p->isam_p_buf);
117 p->isam_p_buf = new_isam_p_buf;
120 new_term_no = (int *) xmalloc(sizeof(*new_term_no) * p->isam_p_size);
123 memcpy(new_term_no, p->isam_p_buf,
124 p->isam_p_indx * sizeof(*p->term_no));
127 p->term_no = new_term_no;
130 assert(*info == sizeof(*p->isam_p_buf));
131 memcpy(p->isam_p_buf + p->isam_p_indx, info+1, sizeof(*p->isam_p_buf));
136 char term_tmp[IT_MAX_WORD];
138 const char *index_name;
139 int len = key_SU_decode(&ord, (const unsigned char *) name);
141 zebra_term_untrans (p->zh, p->index_type, term_tmp, name+len);
142 yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp);
143 zebraExplain_lookup_ord(p->zh->reg->zei,
144 ord, 0 /* index_type */, &db, &index_name);
145 yaz_log(log_level_rpn, "grep: db=%s index=%s", db, index_name);
147 resultSetAddTerm(p->zh, p->termset, name[len], db,
148 index_name, term_tmp);
154 static int grep_handle(char *name, const char *info, void *p)
156 return add_isam_p(name, info, (struct grep_info *) p);
159 static int term_pre(zebra_map_t zm, const char **src,
160 const char *ct1, int first)
162 const char *s1, *s0 = *src;
165 /* skip white space */
168 if (ct1 && strchr(ct1, *s0))
171 map = zebra_maps_input(zm, &s1, strlen(s1), first);
172 if (**map != *CHR_SPACE)
181 static void esc_str(char *out_buf, size_t out_size,
182 const char *in_buf, int in_size)
188 assert(out_size > 20);
190 for (k = 0; k < in_size; k++)
192 int c = in_buf[k] & 0xff;
194 if (c < 32 || c > 126)
198 sprintf(out_buf +strlen(out_buf), "%02X:%c ", c, pc);
199 if (strlen(out_buf) > out_size-20)
201 strcat(out_buf, "..");
207 #define REGEX_CHARS " ^[]()|.*+?!\"$\\"
209 static void add_non_space(const char *start, const char *end,
212 const char **map, int q_map_match)
214 size_t sz = end - start;
216 wrbuf_write(display_term, start, sz);
221 if (strchr(REGEX_CHARS, *start))
222 wrbuf_putc(term_dict, '\\');
223 wrbuf_putc(term_dict, *start);
230 esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
232 wrbuf_puts(term_dict, map[0]);
237 /* ICU sort keys seem to be of the form
238 basechars \x01 accents \x01 length
239 For now we'll just right truncate from basechars . This
240 may give false hits due to accents not being used.
242 static size_t icu_basechars(const char *buf, size_t i)
244 while (i > 0 && buf[--i] != '\x01') /* skip length */
246 while (i > 0 && buf[--i] != '\x01') /* skip accents */
248 return i; /* only basechars left */
251 static int term_102_icu(zebra_map_t zm,
252 const char **src, WRBUF term_dict, int space_split,
256 const char *s0 = *src, *s1;
262 if (*s1 == ' ' && space_split)
264 else if (*s1 && !strchr(REGEX_CHARS "-", *s1))
268 /* EOF or regex reserved char */
271 const char *res_buf = 0;
273 const char *display_buf;
276 zebra_map_tokenize_start(zm, s0, s1 - s0);
278 if (zebra_map_tokenize_next(zm, &res_buf, &res_len,
279 &display_buf, &display_len))
282 res_len = icu_basechars(res_buf, res_len);
283 for (i = 0; i < res_len; i++)
285 if (strchr(REGEX_CHARS "\\", res_buf[i]))
286 wrbuf_putc(term_dict, '\\');
288 wrbuf_putc(term_dict, '\x01');
290 wrbuf_putc(term_dict, res_buf[i]);
292 wrbuf_write(display_term, display_buf, display_len);
300 wrbuf_putc(term_dict, *s1);
301 wrbuf_putc(display_term, *s1);
308 wrbuf_puts(term_dict, "\x01\x01.*");
313 static int term_100_icu(zebra_map_t zm,
314 const char **src, WRBUF term_dict,
320 const char *res_buf = 0;
322 const char *display_buf;
325 zebra_map_tokenize_start(zm, *src, strlen(*src));
326 for (i = 0; i <= token_number; i++)
328 if (!zebra_map_tokenize_next(zm, &res_buf, &res_len,
329 &display_buf, &display_len))
332 wrbuf_write(display_term, display_buf, display_len);
335 res_len = icu_basechars(res_buf, res_len);
338 wrbuf_puts(term_dict, ".*");
339 for (i = 0; i < res_len; i++)
341 if (strchr(REGEX_CHARS "\\", res_buf[i]))
342 wrbuf_putc(term_dict, '\\');
344 wrbuf_putc(term_dict, '\x01');
346 wrbuf_putc(term_dict, res_buf[i]);
349 wrbuf_puts(term_dict, ".*");
351 wrbuf_puts(term_dict, "\x01\x01.*");
355 /* term_100: handle term, where trunc = none(no operators at all) */
356 static int term_100(zebra_map_t zm,
357 const char **src, WRBUF term_dict, int space_split,
364 const char *space_start = 0;
365 const char *space_end = 0;
367 if (!term_pre(zm, src, 0, !space_split))
374 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
377 if (**map == *CHR_SPACE)
380 else /* complete subfield only. */
382 if (**map == *CHR_SPACE)
383 { /* save space mapping for later .. */
388 else if (space_start)
389 { /* reload last space */
390 while (space_start < space_end)
392 if (strchr(REGEX_CHARS, *space_start))
393 wrbuf_putc(term_dict, '\\');
394 wrbuf_putc(display_term, *space_start);
395 wrbuf_putc(term_dict, *space_start);
400 space_start = space_end = 0;
405 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
411 /* term_101: handle term, where trunc = Process # */
412 static int term_101(zebra_map_t zm,
413 const char **src, WRBUF term_dict, int space_split,
420 if (!term_pre(zm, src, "#", !space_split))
428 wrbuf_puts(term_dict, ".*");
429 wrbuf_putc(display_term, *s0);
436 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
437 if (space_split && **map == *CHR_SPACE)
441 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
448 /* term_103: handle term, where trunc = re-2 (regular expressions) */
449 static int term_103(zebra_map_t zm, const char **src,
450 WRBUF term_dict, int *errors, int space_split,
457 if (!term_pre(zm, src, "^\\()[].*+?|", !space_split))
460 if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
461 isdigit(((const unsigned char *)s0)[1]))
463 *errors = s0[1] - '0';
470 if (strchr("^\\()[].*+?|-", *s0))
472 wrbuf_putc(display_term, *s0);
473 wrbuf_putc(term_dict, *s0);
481 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
482 if (space_split && **map == *CHR_SPACE)
486 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
494 /* term_103: handle term, where trunc = re-1 (regular expressions) */
495 static int term_102(zebra_map_t zm, const char **src,
496 WRBUF term_dict, int space_split, WRBUF display_term)
498 return term_103(zm, src, term_dict, NULL, space_split, display_term);
502 /* term_104: handle term, process ?n * # */
503 static int term_104(zebra_map_t zm, const char **src,
504 WRBUF term_dict, int space_split, WRBUF display_term)
510 if (!term_pre(zm, src, "?*#", !space_split))
518 wrbuf_putc(display_term, *s0);
520 if (*s0 >= '0' && *s0 <= '9')
523 while (*s0 >= '0' && *s0 <= '9')
525 limit = limit * 10 + (*s0 - '0');
526 wrbuf_putc(display_term, *s0);
533 wrbuf_puts(term_dict, ".?");
538 wrbuf_puts(term_dict, ".*");
544 wrbuf_puts(term_dict, ".*");
545 wrbuf_putc(display_term, *s0);
551 wrbuf_puts(term_dict, ".");
552 wrbuf_putc(display_term, *s0);
559 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
560 if (space_split && **map == *CHR_SPACE)
564 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
571 /* term_105/106: handle term, process * ! and possibly right_truncate */
572 static int term_105(zebra_map_t zm, const char **src,
573 WRBUF term_dict, int space_split,
574 WRBUF display_term, int right_truncate)
580 if (!term_pre(zm, src, "\\*!", !space_split))
588 wrbuf_puts(term_dict, ".*");
589 wrbuf_putc(display_term, *s0);
595 wrbuf_putc(term_dict, '.');
596 wrbuf_putc(display_term, *s0);
599 else if (*s0 == '\\')
602 wrbuf_puts(term_dict, "\\\\");
603 wrbuf_putc(display_term, *s0);
610 map = zebra_maps_search(zm, &s0, strlen(s0), &q_map_match);
611 if (space_split && **map == *CHR_SPACE)
615 add_non_space(s1, s0, term_dict, display_term, map, q_map_match);
619 wrbuf_puts(term_dict, ".*");
625 /* gen_regular_rel - generate regular expression from relation
626 * val: border value (inclusive)
627 * islt: 1 if <=; 0 if >=.
629 static void gen_regular_rel(WRBUF term_dict, int val, int islt)
631 char dst_buf[20*5*20]; /* assuming enough for expansion */
638 yaz_log(YLOG_DEBUG, "gen_regular_rel. val=%d, islt=%d", val, islt);
642 strcpy(dst, "(-[0-9]+|(");
650 strcpy(dst, "([0-9]+|-(");
661 sprintf(numstr, "%d", val);
662 for (w = strlen(numstr); --w >= 0; pos++)
681 strcpy(dst + dst_p, numstr);
682 dst_p = strlen(dst) - pos - 1;
710 for (i = 0; i < pos; i++)
723 /* match everything less than 10^(pos-1) */
725 for (i = 1; i < pos; i++)
726 strcat(dst, "[0-9]?");
730 /* match everything greater than 10^pos */
731 for (i = 0; i <= pos; i++)
732 strcat(dst, "[0-9]");
733 strcat(dst, "[0-9]*");
736 wrbuf_puts(term_dict, dst);
739 void string_rel_add_char(WRBUF term_p, WRBUF wsrc, int *indx)
741 const char *src = wrbuf_cstr(wsrc);
742 if (src[*indx] == '\\')
744 wrbuf_putc(term_p, src[*indx]);
747 wrbuf_putc(term_p, src[*indx]);
752 * > abc ([b-].*|a[c-].*|ab[d-].*|abc.+)
753 * ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
754 * >= abc ([b-].*|a[c-].*|ab[c-].*)
755 * ([^-a].*|a[^-b].*|ab[c-].*)
756 * < abc ([-0].*|a[-a].*|ab[-b].*)
757 * ([^a-].*|a[^b-].*|ab[^c-].*)
758 * <= abc ([-0].*|a[-a].*|ab[-b].*|abc)
759 * ([^a-].*|a[^b-].*|ab[^c-].*|abc)
761 static int string_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
762 const char **term_sub, WRBUF term_dict,
763 const Odr_oid *attributeSet,
764 zebra_map_t zm, int space_split,
771 WRBUF term_component = wrbuf_alloc();
773 attr_init_APT(&relation, zapt, 2);
774 relation_value = attr_find(&relation, NULL);
777 yaz_log(YLOG_DEBUG, "string relation value=%d", relation_value);
778 switch (relation_value)
781 if (!term_100(zm, term_sub, term_component, space_split, display_term))
783 wrbuf_destroy(term_component);
786 yaz_log(log_level_rpn, "Relation <");
788 wrbuf_putc(term_dict, '(');
789 for (i = 0; i < wrbuf_len(term_component); )
794 wrbuf_putc(term_dict, '|');
796 string_rel_add_char(term_dict, term_component, &j);
798 wrbuf_putc(term_dict, '[');
800 wrbuf_putc(term_dict, '^');
802 wrbuf_putc(term_dict, 1);
803 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
805 string_rel_add_char(term_dict, term_component, &i);
806 wrbuf_putc(term_dict, '-');
808 wrbuf_putc(term_dict, ']');
809 wrbuf_putc(term_dict, '.');
810 wrbuf_putc(term_dict, '*');
812 wrbuf_putc(term_dict, ')');
815 if (!term_100(zm, term_sub, term_component, space_split, display_term))
817 wrbuf_destroy(term_component);
820 yaz_log(log_level_rpn, "Relation <=");
822 wrbuf_putc(term_dict, '(');
823 for (i = 0; i < wrbuf_len(term_component); )
828 string_rel_add_char(term_dict, term_component, &j);
829 wrbuf_putc(term_dict, '[');
831 wrbuf_putc(term_dict, '^');
833 wrbuf_putc(term_dict, 1);
834 wrbuf_putc(term_dict, FIRST_IN_FIELD_CHAR);
836 string_rel_add_char(term_dict, term_component, &i);
837 wrbuf_putc(term_dict, '-');
839 wrbuf_putc(term_dict, ']');
840 wrbuf_putc(term_dict, '.');
841 wrbuf_putc(term_dict, '*');
843 wrbuf_putc(term_dict, '|');
845 for (i = 0; i < wrbuf_len(term_component); )
846 string_rel_add_char(term_dict, term_component, &i);
847 wrbuf_putc(term_dict, ')');
850 if (!term_100(zm, term_sub, term_component, space_split, display_term))
852 wrbuf_destroy(term_component);
855 yaz_log(log_level_rpn, "Relation >");
857 wrbuf_putc(term_dict, '(');
858 for (i = 0; i < wrbuf_len(term_component); )
863 string_rel_add_char(term_dict, term_component, &j);
864 wrbuf_putc(term_dict, '[');
866 wrbuf_putc(term_dict, '^');
867 wrbuf_putc(term_dict, '-');
868 string_rel_add_char(term_dict, term_component, &i);
870 wrbuf_putc(term_dict, ']');
871 wrbuf_putc(term_dict, '.');
872 wrbuf_putc(term_dict, '*');
874 wrbuf_putc(term_dict, '|');
876 for (i = 0; i < wrbuf_len(term_component); )
877 string_rel_add_char(term_dict, term_component, &i);
878 wrbuf_putc(term_dict, '.');
879 wrbuf_putc(term_dict, '+');
880 wrbuf_putc(term_dict, ')');
883 if (!term_100(zm, term_sub, term_component, space_split, display_term))
885 wrbuf_destroy(term_component);
888 yaz_log(log_level_rpn, "Relation >=");
890 wrbuf_putc(term_dict, '(');
891 for (i = 0; i < wrbuf_len(term_component); )
896 wrbuf_putc(term_dict, '|');
898 string_rel_add_char(term_dict, term_component, &j);
899 wrbuf_putc(term_dict, '[');
901 if (i < wrbuf_len(term_component)-1)
903 wrbuf_putc(term_dict, '^');
904 wrbuf_putc(term_dict, '-');
905 string_rel_add_char(term_dict, term_component, &i);
909 string_rel_add_char(term_dict, term_component, &i);
910 wrbuf_putc(term_dict, '-');
912 wrbuf_putc(term_dict, ']');
913 wrbuf_putc(term_dict, '.');
914 wrbuf_putc(term_dict, '*');
916 wrbuf_putc(term_dict, ')');
923 yaz_log(log_level_rpn, "Relation =");
924 if (!term_100(zm, term_sub, term_component, space_split, display_term))
926 wrbuf_destroy(term_component);
929 wrbuf_puts(term_dict, "(");
930 wrbuf_puts(term_dict, wrbuf_cstr(term_component));
931 wrbuf_puts(term_dict, ")");
934 yaz_log(log_level_rpn, "Relation always matches");
935 /* skip to end of term (we don't care what it is) */
936 while (**term_sub != '\0')
940 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
941 wrbuf_destroy(term_component);
944 wrbuf_destroy(term_component);
948 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
949 const char **term_sub,
951 const Odr_oid *attributeSet, NMEM stream,
952 struct grep_info *grep_info,
953 const char *index_type, int complete_flag,
955 const char *xpath_use,
956 struct ord_list **ol,
957 zebra_map_t zm, size_t token_number);
959 ZEBRA_RES zebra_term_limits_APT(ZebraHandle zh,
960 Z_AttributesPlusTerm *zapt,
961 zint *hits_limit_value,
962 const char **term_ref_id_str,
965 AttrType term_ref_id_attr;
966 AttrType hits_limit_attr;
968 zint hits_limit_from_attr;
970 attr_init_APT(&hits_limit_attr, zapt, 11);
971 hits_limit_from_attr = attr_find(&hits_limit_attr, NULL);
973 attr_init_APT(&term_ref_id_attr, zapt, 10);
974 term_ref_id_int = attr_find_ex(&term_ref_id_attr, NULL, term_ref_id_str);
975 if (term_ref_id_int >= 0)
977 char *res = nmem_malloc(nmem, 20);
978 sprintf(res, "%d", term_ref_id_int);
979 *term_ref_id_str = res;
981 if (hits_limit_from_attr != -1)
982 *hits_limit_value = hits_limit_from_attr;
984 yaz_log(YLOG_DEBUG, "term_limits_APT ref_id=%s limit=" ZINT_FORMAT,
985 *term_ref_id_str ? *term_ref_id_str : "none",
990 /** \brief search for term (which may be truncated)
992 static ZEBRA_RES search_term(ZebraHandle zh,
993 Z_AttributesPlusTerm *zapt,
994 const char **term_sub,
995 const Odr_oid *attributeSet,
996 zint hits_limit, NMEM stream,
997 struct grep_info *grep_info,
998 const char *index_type, int complete_flag,
999 const char *rank_type,
1000 const char *xpath_use,
1003 struct rset_key_control *kc,
1005 size_t token_number)
1008 struct ord_list *ol;
1009 zint hits_limit_value = hits_limit;
1010 const char *term_ref_id_str = 0;
1011 WRBUF term_dict = wrbuf_alloc();
1012 WRBUF display_term = wrbuf_alloc();
1014 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1016 grep_info->isam_p_indx = 0;
1017 res = string_term(zh, zapt, term_sub, term_dict,
1018 attributeSet, stream, grep_info,
1019 index_type, complete_flag,
1020 display_term, xpath_use, &ol, zm, token_number);
1021 wrbuf_destroy(term_dict);
1022 if (res == ZEBRA_OK && *term_sub)
1024 yaz_log(log_level_rpn, "term: %s", wrbuf_cstr(display_term));
1025 *rset = rset_trunc(zh, grep_info->isam_p_buf,
1026 grep_info->isam_p_indx, wrbuf_buf(display_term),
1027 wrbuf_len(display_term), rank_type,
1028 1 /* preserve pos */,
1029 zapt->term->which, rset_nmem,
1030 kc, kc->scope, ol, index_type, hits_limit_value,
1035 wrbuf_destroy(display_term);
1039 static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1040 const char **term_sub,
1042 const Odr_oid *attributeSet, NMEM stream,
1043 struct grep_info *grep_info,
1044 const char *index_type, int complete_flag,
1046 const char *xpath_use,
1047 struct ord_list **ol,
1048 zebra_map_t zm, size_t token_number)
1051 AttrType truncation;
1052 int truncation_value;
1054 struct rpn_char_map_info rcmi;
1056 int space_split = complete_flag ? 0 : 1;
1058 int regex_range = 0;
1059 int max_pos, prefix_len = 0;
1064 *ol = ord_list_create(stream);
1066 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1067 attr_init_APT(&truncation, zapt, 5);
1068 truncation_value = attr_find(&truncation, NULL);
1069 yaz_log(log_level_rpn, "truncation value %d", truncation_value);
1071 termp = *term_sub; /* start of term for each database */
1073 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1074 attributeSet, &ord) != ZEBRA_OK)
1080 wrbuf_rewind(term_dict); /* new dictionary regexp term */
1082 *ol = ord_list_append(stream, *ol, ord);
1083 ord_len = key_SU_encode(ord, ord_buf);
1085 wrbuf_putc(term_dict, '(');
1087 for (i = 0; i < ord_len; i++)
1089 wrbuf_putc(term_dict, 1); /* our internal regexp escape char */
1090 wrbuf_putc(term_dict, ord_buf[i]);
1092 wrbuf_putc(term_dict, ')');
1094 prefix_len = wrbuf_len(term_dict);
1096 if (zebra_maps_is_icu(zm))
1101 attr_init_APT(&relation, zapt, 2);
1102 relation_value = attr_find(&relation, NULL);
1103 if (relation_value == 103) /* always matches */
1104 termp += strlen(termp); /* move to end of term */
1105 else if (relation_value == 3 || relation_value == 102 || relation_value == -1)
1108 switch (truncation_value)
1110 case -1: /* not specified */
1111 case 100: /* do not truncate */
1112 if (!term_100_icu(zm, &termp, term_dict, display_term, 0, token_number))
1119 if (!term_102_icu(zm, &termp, term_dict, space_split, display_term))
1125 case 1: /* right truncation */
1126 if (!term_100_icu(zm, &termp, term_dict, display_term, 1, token_number))
1133 if (!term_100_icu(zm, &termp, term_dict, display_term, 2, token_number))
1140 if (!term_100_icu(zm, &termp, term_dict, display_term, 3, token_number))
1147 zebra_setError_zint(zh,
1148 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1155 zebra_setError_zint(zh,
1156 YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE,
1163 /* non-ICU case. using string.chr and friends */
1164 switch (truncation_value)
1166 case -1: /* not specified */
1167 case 100: /* do not truncate */
1168 if (!string_relation(zh, zapt, &termp, term_dict,
1170 zm, space_split, display_term,
1175 zebra_setError(zh, relation_error, 0);
1182 case 1: /* right truncation */
1183 wrbuf_putc(term_dict, '(');
1184 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1189 wrbuf_puts(term_dict, ".*)");
1191 case 2: /* left truncation */
1192 wrbuf_puts(term_dict, "(.*");
1193 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1198 wrbuf_putc(term_dict, ')');
1200 case 3: /* left&right truncation */
1201 wrbuf_puts(term_dict, "(.*");
1202 if (!term_100(zm, &termp, term_dict, space_split, display_term))
1207 wrbuf_puts(term_dict, ".*)");
1209 case 101: /* process # in term */
1210 wrbuf_putc(term_dict, '(');
1211 if (!term_101(zm, &termp, term_dict, space_split, display_term))
1216 wrbuf_puts(term_dict, ")");
1218 case 102: /* Regexp-1 */
1219 wrbuf_putc(term_dict, '(');
1220 if (!term_102(zm, &termp, term_dict, space_split, display_term))
1225 wrbuf_putc(term_dict, ')');
1227 case 103: /* Regexp-2 */
1229 wrbuf_putc(term_dict, '(');
1230 if (!term_103(zm, &termp, term_dict, ®ex_range,
1231 space_split, display_term))
1236 wrbuf_putc(term_dict, ')');
1238 case 104: /* process ?n * # term */
1239 wrbuf_putc(term_dict, '(');
1240 if (!term_104(zm, &termp, term_dict, space_split, display_term))
1245 wrbuf_putc(term_dict, ')');
1247 case 105: /* process * ! in term and right truncate */
1248 wrbuf_putc(term_dict, '(');
1249 if (!term_105(zm, &termp, term_dict, space_split, display_term, 1))
1254 wrbuf_putc(term_dict, ')');
1256 case 106: /* process * ! in term */
1257 wrbuf_putc(term_dict, '(');
1258 if (!term_105(zm, &termp, term_dict, space_split, display_term, 0))
1263 wrbuf_putc(term_dict, ')');
1266 zebra_setError_zint(zh,
1267 YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE,
1275 const char *input = wrbuf_cstr(term_dict) + prefix_len;
1276 esc_str(buf, sizeof(buf), input, strlen(input));
1279 WRBUF pr_wr = wrbuf_alloc();
1281 wrbuf_write_escaped(pr_wr, wrbuf_buf(term_dict), wrbuf_len(term_dict));
1282 yaz_log(YLOG_LOG, "dict_lookup_grep: %s", wrbuf_cstr(pr_wr));
1283 wrbuf_destroy(pr_wr);
1285 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), regex_range,
1286 grep_info, &max_pos,
1287 ord_len /* number of "exact" chars */,
1290 zebra_set_partial_result(zh);
1292 yaz_log(YLOG_WARN, "dict_lookup_grep fail %d", r);
1294 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1300 static void grep_info_delete(struct grep_info *grep_info)
1303 xfree(grep_info->term_no);
1305 xfree(grep_info->isam_p_buf);
1308 static ZEBRA_RES grep_info_prepare(ZebraHandle zh,
1309 Z_AttributesPlusTerm *zapt,
1310 struct grep_info *grep_info,
1311 const char *index_type)
1314 grep_info->term_no = 0;
1316 grep_info->trunc_max = atoi(res_get_def(zh->res, "truncmax", "10000"));
1317 grep_info->isam_p_size = 0;
1318 grep_info->isam_p_buf = NULL;
1320 grep_info->index_type = index_type;
1321 grep_info->termset = 0;
1327 attr_init_APT(&truncmax, zapt, 13);
1328 truncmax_value = attr_find(&truncmax, NULL);
1329 if (truncmax_value != -1)
1330 grep_info->trunc_max = truncmax_value;
1335 int termset_value_numeric;
1336 const char *termset_value_string;
1338 attr_init_APT(&termset, zapt, 8);
1339 termset_value_numeric =
1340 attr_find_ex(&termset, NULL, &termset_value_string);
1341 if (termset_value_numeric != -1)
1344 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "termset");
1348 const char *termset_name = 0;
1349 if (termset_value_numeric != -2)
1352 sprintf(resname, "%d", termset_value_numeric);
1353 termset_name = resname;
1356 termset_name = termset_value_string;
1357 yaz_log(log_level_rpn, "creating termset set %s", termset_name);
1358 grep_info->termset = resultSetAdd(zh, termset_name, 1);
1359 if (!grep_info->termset)
1361 zebra_setError(zh, YAZ_BIB1_ILLEGAL_RESULT_SET_NAME, termset_name);
1370 static ZEBRA_RES search_terms_chrmap(ZebraHandle zh,
1371 Z_AttributesPlusTerm *zapt,
1373 const Odr_oid *attributeSet,
1376 const char *index_type, int complete_flag,
1377 const char *rank_type,
1378 const char *xpath_use,
1380 RSET **result_sets, int *num_result_sets,
1381 struct rset_key_control *kc,
1384 struct grep_info grep_info;
1385 const char *termp = termz;
1388 *num_result_sets = 0;
1389 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1395 if (alloc_sets == *num_result_sets)
1398 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1401 memcpy(rnew, *result_sets, alloc_sets * sizeof(*rnew));
1402 alloc_sets = alloc_sets + add;
1403 *result_sets = rnew;
1405 res = search_term(zh, zapt, &termp, attributeSet, hits_limit,
1407 index_type, complete_flag,
1409 xpath_use, rset_nmem,
1410 &(*result_sets)[*num_result_sets],
1413 if (res != ZEBRA_OK)
1416 for (i = 0; i < *num_result_sets; i++)
1417 rset_delete((*result_sets)[i]);
1418 grep_info_delete(&grep_info);
1421 if ((*result_sets)[*num_result_sets] == 0)
1423 (*num_result_sets)++;
1428 grep_info_delete(&grep_info);
1433 \brief Create result set(s) for list of terms
1434 \param zh Zebra Handle
1435 \param zapt Attributes Plust Term (RPN leaf)
1436 \param termz term as used in query but converted to UTF-8
1437 \param attributeSet default attribute set
1438 \param stream memory for result
1439 \param index_type register type ("w", "p",..)
1440 \param complete_flag whether it's phrases or not
1441 \param rank_type term flags for ranking
1442 \param xpath_use use attribute for X-Path (-1 for no X-path)
1443 \param rset_nmem memory for result sets
1444 \param result_sets output result set for each term in list (output)
1445 \param num_result_sets number of output result sets
1446 \param kc rset key control to be used for created result sets
1448 static ZEBRA_RES search_terms_list(ZebraHandle zh,
1449 Z_AttributesPlusTerm *zapt,
1451 const Odr_oid *attributeSet,
1454 const char *index_type, int complete_flag,
1455 const char *rank_type,
1456 const char *xpath_use,
1458 RSET **result_sets, int *num_result_sets,
1459 struct rset_key_control *kc)
1461 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1462 return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit,
1463 stream, index_type, complete_flag,
1464 rank_type, xpath_use,
1465 rset_nmem, result_sets, num_result_sets,
1470 /** \brief limit a search by position - returns result set
1472 static ZEBRA_RES search_position(ZebraHandle zh,
1473 Z_AttributesPlusTerm *zapt,
1474 const Odr_oid *attributeSet,
1475 const char *index_type,
1478 struct rset_key_control *kc)
1484 char term_dict[100];
1488 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1490 attr_init_APT(&position, zapt, 3);
1491 position_value = attr_find(&position, NULL);
1492 switch(position_value)
1501 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1507 if (!zebra_maps_is_first_in_field(zm))
1509 zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_POSITION_ATTRIBUTE,
1514 if (zebra_apt_get_ord(zh, zapt, index_type, 0,
1515 attributeSet, &ord) != ZEBRA_OK)
1519 ord_len = key_SU_encode(ord, ord_buf);
1520 memcpy(term_dict, ord_buf, ord_len);
1521 strcpy(term_dict+ord_len, FIRST_IN_FIELD_STR);
1522 val = dict_lookup(zh->reg->dict, term_dict);
1525 assert(*val == sizeof(ISAM_P));
1526 memcpy(&isam_p, val+1, sizeof(isam_p));
1528 *rset = zebra_create_rset_isam(zh, rset_nmem, kc, kc->scope,
1534 /** \brief returns result set for phrase search
1536 static ZEBRA_RES rpn_search_APT_phrase(ZebraHandle zh,
1537 Z_AttributesPlusTerm *zapt,
1538 const char *termz_org,
1539 const Odr_oid *attributeSet,
1542 const char *index_type,
1544 const char *rank_type,
1545 const char *xpath_use,
1548 struct rset_key_control *kc)
1550 RSET *result_sets = 0;
1551 int num_result_sets = 0;
1553 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1554 stream, index_type, complete_flag,
1555 rank_type, xpath_use,
1557 &result_sets, &num_result_sets, kc);
1559 if (res != ZEBRA_OK)
1562 if (num_result_sets > 0)
1565 res = search_position(zh, zapt, attributeSet,
1567 rset_nmem, &first_set,
1569 if (res != ZEBRA_OK)
1572 for (i = 0; i < num_result_sets; i++)
1573 rset_delete(result_sets[i]);
1578 RSET *nsets = nmem_malloc(stream,
1579 sizeof(RSET) * (num_result_sets+1));
1580 nsets[0] = first_set;
1581 memcpy(nsets+1, result_sets, sizeof(RSET) * num_result_sets);
1582 result_sets = nsets;
1586 if (num_result_sets == 0)
1587 *rset = rset_create_null(rset_nmem, kc, 0);
1588 else if (num_result_sets == 1)
1589 *rset = result_sets[0];
1591 *rset = rset_create_prox(rset_nmem, kc, kc->scope,
1592 num_result_sets, result_sets,
1593 1 /* ordered */, 0 /* exclusion */,
1594 3 /* relation */, 1 /* distance */);
1600 /** \brief returns result set for or-list search
1602 static ZEBRA_RES rpn_search_APT_or_list(ZebraHandle zh,
1603 Z_AttributesPlusTerm *zapt,
1604 const char *termz_org,
1605 const Odr_oid *attributeSet,
1608 const char *index_type,
1610 const char *rank_type,
1611 const char *xpath_use,
1614 struct rset_key_control *kc)
1616 RSET *result_sets = 0;
1617 int num_result_sets = 0;
1620 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1621 stream, index_type, complete_flag,
1622 rank_type, xpath_use,
1624 &result_sets, &num_result_sets, kc);
1625 if (res != ZEBRA_OK)
1628 for (i = 0; i < num_result_sets; i++)
1631 res = search_position(zh, zapt, attributeSet,
1633 rset_nmem, &first_set,
1635 if (res != ZEBRA_OK)
1637 for (i = 0; i < num_result_sets; i++)
1638 rset_delete(result_sets[i]);
1646 tmp_set[0] = first_set;
1647 tmp_set[1] = result_sets[i];
1649 result_sets[i] = rset_create_prox(
1650 rset_nmem, kc, kc->scope,
1652 1 /* ordered */, 0 /* exclusion */,
1653 3 /* relation */, 1 /* distance */);
1656 if (num_result_sets == 0)
1657 *rset = rset_create_null(rset_nmem, kc, 0);
1658 else if (num_result_sets == 1)
1659 *rset = result_sets[0];
1661 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* termid */,
1662 num_result_sets, result_sets);
1668 /** \brief returns result set for and-list search
1670 static ZEBRA_RES rpn_search_APT_and_list(ZebraHandle zh,
1671 Z_AttributesPlusTerm *zapt,
1672 const char *termz_org,
1673 const Odr_oid *attributeSet,
1676 const char *index_type,
1678 const char *rank_type,
1679 const char *xpath_use,
1682 struct rset_key_control *kc)
1684 RSET *result_sets = 0;
1685 int num_result_sets = 0;
1688 search_terms_list(zh, zapt, termz_org, attributeSet, hits_limit,
1689 stream, index_type, complete_flag,
1690 rank_type, xpath_use,
1692 &result_sets, &num_result_sets,
1694 if (res != ZEBRA_OK)
1696 for (i = 0; i < num_result_sets; i++)
1699 res = search_position(zh, zapt, attributeSet,
1701 rset_nmem, &first_set,
1703 if (res != ZEBRA_OK)
1705 for (i = 0; i < num_result_sets; i++)
1706 rset_delete(result_sets[i]);
1714 tmp_set[0] = first_set;
1715 tmp_set[1] = result_sets[i];
1717 result_sets[i] = rset_create_prox(
1718 rset_nmem, kc, kc->scope,
1720 1 /* ordered */, 0 /* exclusion */,
1721 3 /* relation */, 1 /* distance */);
1726 if (num_result_sets == 0)
1727 *rset = rset_create_null(rset_nmem, kc, 0);
1728 else if (num_result_sets == 1)
1729 *rset = result_sets[0];
1731 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1732 num_result_sets, result_sets);
1738 static int numeric_relation(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1739 const char **term_sub,
1741 const Odr_oid *attributeSet,
1742 struct grep_info *grep_info,
1752 WRBUF term_num = wrbuf_alloc();
1755 attr_init_APT(&relation, zapt, 2);
1756 relation_value = attr_find(&relation, NULL);
1758 yaz_log(log_level_rpn, "numeric relation value=%d", relation_value);
1760 switch (relation_value)
1763 yaz_log(log_level_rpn, "Relation <");
1764 if (!term_100(zm, term_sub, term_num, 1, display_term))
1766 wrbuf_destroy(term_num);
1769 term_value = atoi(wrbuf_cstr(term_num));
1770 gen_regular_rel(term_dict, term_value-1, 1);
1773 yaz_log(log_level_rpn, "Relation <=");
1774 if (!term_100(zm, term_sub, term_num, 1, display_term))
1776 wrbuf_destroy(term_num);
1779 term_value = atoi(wrbuf_cstr(term_num));
1780 gen_regular_rel(term_dict, term_value, 1);
1783 yaz_log(log_level_rpn, "Relation >=");
1784 if (!term_100(zm, term_sub, term_num, 1, display_term))
1786 wrbuf_destroy(term_num);
1789 term_value = atoi(wrbuf_cstr(term_num));
1790 gen_regular_rel(term_dict, term_value, 0);
1793 yaz_log(log_level_rpn, "Relation >");
1794 if (!term_100(zm, term_sub, term_num, 1, display_term))
1796 wrbuf_destroy(term_num);
1799 term_value = atoi(wrbuf_cstr(term_num));
1800 gen_regular_rel(term_dict, term_value+1, 0);
1805 yaz_log(log_level_rpn, "Relation =");
1806 if (!term_100(zm, term_sub, term_num, 1, display_term))
1808 wrbuf_destroy(term_num);
1811 term_value = atoi(wrbuf_cstr(term_num));
1812 wrbuf_printf(term_dict, "(0*%d)", term_value);
1815 /* term_tmp untouched.. */
1816 while (**term_sub != '\0')
1820 *error_code = YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE;
1821 wrbuf_destroy(term_num);
1824 r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict),
1825 0, grep_info, max_pos, 0, grep_handle);
1828 zebra_set_partial_result(zh);
1830 yaz_log(YLOG_WARN, "dict_lookup_grep fail, rel = gt: %d", r);
1831 yaz_log(log_level_rpn, "%d positions", grep_info->isam_p_indx);
1832 wrbuf_destroy(term_num);
1836 static ZEBRA_RES numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
1837 const char **term_sub,
1839 const Odr_oid *attributeSet, NMEM stream,
1840 struct grep_info *grep_info,
1841 const char *index_type, int complete_flag,
1843 const char *xpath_use,
1844 struct ord_list **ol)
1847 struct rpn_char_map_info rcmi;
1849 int relation_error = 0;
1850 int ord, ord_len, i;
1852 zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type);
1854 *ol = ord_list_create(stream);
1856 rpn_char_map_prepare(zh->reg, zm, &rcmi);
1860 if (zebra_apt_get_ord(zh, zapt, index_type, xpath_use,
1861 attributeSet, &ord) != ZEBRA_OK)
1866 wrbuf_rewind(term_dict);
1868 *ol = ord_list_append(stream, *ol, ord);
1870 ord_len = key_SU_encode(ord, ord_buf);
1872 wrbuf_putc(term_dict, '(');
1873 for (i = 0; i < ord_len; i++)
1875 wrbuf_putc(term_dict, 1);
1876 wrbuf_putc(term_dict, ord_buf[i]);
1878 wrbuf_putc(term_dict, ')');
1880 if (!numeric_relation(zh, zapt, &termp, term_dict,
1881 attributeSet, grep_info, &max_pos, zm,
1882 display_term, &relation_error))
1886 zebra_setError(zh, relation_error, 0);
1893 yaz_log(YLOG_DEBUG, "%d positions", grep_info->isam_p_indx);
1898 static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh,
1899 Z_AttributesPlusTerm *zapt,
1901 const Odr_oid *attributeSet,
1904 const char *index_type,
1906 const char *rank_type,
1907 const char *xpath_use,
1910 struct rset_key_control *kc)
1912 const char *termp = termz;
1913 RSET *result_sets = 0;
1914 int num_result_sets = 0;
1916 struct grep_info grep_info;
1918 zint hits_limit_value = hits_limit;
1919 const char *term_ref_id_str = 0;
1921 zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str,
1924 yaz_log(log_level_rpn, "APT_numeric t='%s'", termz);
1925 if (grep_info_prepare(zh, zapt, &grep_info, index_type) == ZEBRA_FAIL)
1929 struct ord_list *ol;
1930 WRBUF term_dict = wrbuf_alloc();
1931 WRBUF display_term = wrbuf_alloc();
1932 if (alloc_sets == num_result_sets)
1935 RSET *rnew = (RSET *) nmem_malloc(stream, (alloc_sets+add) *
1938 memcpy(rnew, result_sets, alloc_sets * sizeof(*rnew));
1939 alloc_sets = alloc_sets + add;
1942 yaz_log(YLOG_DEBUG, "APT_numeric termp=%s", termp);
1943 grep_info.isam_p_indx = 0;
1944 res = numeric_term(zh, zapt, &termp, term_dict,
1945 attributeSet, stream, &grep_info,
1946 index_type, complete_flag,
1947 display_term, xpath_use, &ol);
1948 wrbuf_destroy(term_dict);
1949 if (res == ZEBRA_FAIL || termp == 0)
1951 wrbuf_destroy(display_term);
1954 yaz_log(YLOG_DEBUG, "term: %s", wrbuf_cstr(display_term));
1955 result_sets[num_result_sets] =
1956 rset_trunc(zh, grep_info.isam_p_buf,
1957 grep_info.isam_p_indx, wrbuf_buf(display_term),
1958 wrbuf_len(display_term), rank_type,
1959 0 /* preserve position */,
1960 zapt->term->which, rset_nmem,
1961 kc, kc->scope, ol, index_type,
1964 wrbuf_destroy(display_term);
1965 if (!result_sets[num_result_sets])
1971 grep_info_delete(&grep_info);
1973 if (res != ZEBRA_OK)
1975 if (num_result_sets == 0)
1976 *rset = rset_create_null(rset_nmem, kc, 0);
1977 else if (num_result_sets == 1)
1978 *rset = result_sets[0];
1980 *rset = rset_create_and(rset_nmem, kc, kc->scope,
1981 num_result_sets, result_sets);
1987 static ZEBRA_RES rpn_search_APT_local(ZebraHandle zh,
1988 Z_AttributesPlusTerm *zapt,
1990 const Odr_oid *attributeSet,
1992 const char *rank_type, NMEM rset_nmem,
1994 struct rset_key_control *kc)
1997 zint sysno = atozint(termz);
2001 rec = rec_get(zh->reg->records, sysno);
2009 *rset = rset_create_null(rset_nmem, kc, 0);
2015 *rset = rset_create_temp(rset_nmem, kc, kc->scope,
2016 res_get(zh->res, "setTmpDir"), 0);
2017 rsfd = rset_open(*rset, RSETF_WRITE);
2022 rset_write(rsfd, &key);
2028 static ZEBRA_RES rpn_sort_spec(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2029 const Odr_oid *attributeSet, NMEM stream,
2030 Z_SortKeySpecList *sort_sequence,
2031 const char *rank_type,
2034 struct rset_key_control *kc)
2037 int sort_relation_value;
2038 AttrType sort_relation_type;
2043 attr_init_APT(&sort_relation_type, zapt, 7);
2044 sort_relation_value = attr_find(&sort_relation_type, &attributeSet);
2046 if (!sort_sequence->specs)
2048 sort_sequence->num_specs = 10;
2049 sort_sequence->specs = (Z_SortKeySpec **)
2050 nmem_malloc(stream, sort_sequence->num_specs *
2051 sizeof(*sort_sequence->specs));
2052 for (i = 0; i < sort_sequence->num_specs; i++)
2053 sort_sequence->specs[i] = 0;
2055 if (zapt->term->which != Z_Term_general)
2058 i = atoi_n((char *) zapt->term->u.general->buf,
2059 zapt->term->u.general->len);
2060 if (i >= sort_sequence->num_specs)
2062 sprintf(termz, "%d", i);
2064 sks = (Z_SortKeySpec *) nmem_malloc(stream, sizeof(*sks));
2065 sks->sortElement = (Z_SortElement *)
2066 nmem_malloc(stream, sizeof(*sks->sortElement));
2067 sks->sortElement->which = Z_SortElement_generic;
2068 sk = sks->sortElement->u.generic = (Z_SortKey *)
2069 nmem_malloc(stream, sizeof(*sk));
2070 sk->which = Z_SortKey_sortAttributes;
2071 sk->u.sortAttributes = (Z_SortAttributes *)
2072 nmem_malloc(stream, sizeof(*sk->u.sortAttributes));
2074 sk->u.sortAttributes->id = odr_oiddup_nmem(stream, attributeSet);
2075 sk->u.sortAttributes->list = zapt->attributes;
2077 sks->sortRelation = (Odr_int *)
2078 nmem_malloc(stream, sizeof(*sks->sortRelation));
2079 if (sort_relation_value == 1)
2080 *sks->sortRelation = Z_SortKeySpec_ascending;
2081 else if (sort_relation_value == 2)
2082 *sks->sortRelation = Z_SortKeySpec_descending;
2084 *sks->sortRelation = Z_SortKeySpec_ascending;
2086 sks->caseSensitivity = (Odr_int *)
2087 nmem_malloc(stream, sizeof(*sks->caseSensitivity));
2088 *sks->caseSensitivity = 0;
2090 sks->which = Z_SortKeySpec_null;
2091 sks->u.null = odr_nullval ();
2092 sort_sequence->specs[i] = sks;
2093 *rset = rset_create_null(rset_nmem, kc, 0);
2098 static int rpn_check_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2099 const Odr_oid *attributeSet,
2100 struct xpath_location_step *xpath, int max,
2103 const Odr_oid *curAttributeSet = attributeSet;
2105 const char *use_string = 0;
2107 attr_init_APT(&use, zapt, 1);
2108 attr_find_ex(&use, &curAttributeSet, &use_string);
2110 if (!use_string || *use_string != '/')
2113 return zebra_parse_xpath_str(use_string, xpath, max, mem);
2118 static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
2119 const char *index_type, const char *term,
2120 const char *xpath_use,
2122 struct rset_key_control *kc)
2124 struct grep_info grep_info;
2125 int ord = zebraExplain_lookup_attr_str(zh->reg->zei,
2126 zinfo_index_category_index,
2127 index_type, xpath_use);
2128 if (grep_info_prepare(zh, 0 /* zapt */, &grep_info, "0") == ZEBRA_FAIL)
2129 return rset_create_null(rset_nmem, kc, 0);
2132 return rset_create_null(rset_nmem, kc, 0);
2138 WRBUF term_dict = wrbuf_alloc();
2139 int ord_len = key_SU_encode(ord, ord_buf);
2140 int term_type = Z_Term_characterString;
2141 const char *flags = "void";
2143 wrbuf_putc(term_dict, '(');
2144 for (i = 0; i < ord_len; i++)
2146 wrbuf_putc(term_dict, 1);
2147 wrbuf_putc(term_dict, ord_buf[i]);
2149 wrbuf_putc(term_dict, ')');
2150 wrbuf_puts(term_dict, term);
2152 grep_info.isam_p_indx = 0;
2153 dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0,
2154 &grep_info, &max_pos, 0, grep_handle);
2155 yaz_log(YLOG_DEBUG, "%s %d positions", term,
2156 grep_info.isam_p_indx);
2157 rset = rset_trunc(zh, grep_info.isam_p_buf,
2158 grep_info.isam_p_indx, term, strlen(term),
2159 flags, 1, term_type, rset_nmem,
2160 kc, kc->scope, 0, index_type, 0 /* hits_limit */,
2161 0 /* term_ref_id_str */);
2162 grep_info_delete(&grep_info);
2163 wrbuf_destroy(term_dict);
2169 ZEBRA_RES rpn_search_xpath(ZebraHandle zh,
2170 NMEM stream, const char *rank_type, RSET rset,
2171 int xpath_len, struct xpath_location_step *xpath,
2174 struct rset_key_control *kc)
2177 int always_matches = rset ? 0 : 1;
2185 yaz_log(YLOG_DEBUG, "xpath len=%d", xpath_len);
2186 for (i = 0; i < xpath_len; i++)
2188 yaz_log(log_level_rpn, "XPATH %d %s", i, xpath[i].part);
2200 a[@attr = value]/b[@other = othervalue]
2202 /e/@a val range(e/,range(@a,freetext(w,1015,val),@a),e/)
2203 /a/b val range(b/a/,freetext(w,1016,val),b/a/)
2204 /a/b/@c val range(b/a/,range(@c,freetext(w,1016,val),@c),b/a/)
2205 /a/b[@c = y] val range(b/a/,freetext(w,1016,val),b/a/,@c = y)
2206 /a[@c = y]/b val range(a/,range(b/a/,freetext(w,1016,val),b/a/),a/,@c = y)
2207 /a[@c = x]/b[@c = y] range(a/,range(b/a/,freetext(w,1016,val),b/a/,@c = y),a/,@c = x)
2211 dict_grep_cmap(zh->reg->dict, 0, 0);
2214 int level = xpath_len;
2217 while (--level >= 0)
2219 WRBUF xpath_rev = wrbuf_alloc();
2221 RSET rset_start_tag = 0, rset_end_tag = 0, rset_attr = 0;
2223 for (i = level; i >= 1; --i)
2225 const char *cp = xpath[i].part;
2231 wrbuf_puts(xpath_rev, "[^/]*");
2232 else if (*cp == ' ')
2233 wrbuf_puts(xpath_rev, "\001 ");
2235 wrbuf_putc(xpath_rev, *cp);
2237 /* wrbuf_putc does not null-terminate , but
2238 wrbuf_puts below ensures it does.. so xpath_rev
2239 is OK iff length is > 0 */
2241 wrbuf_puts(xpath_rev, "/");
2243 else if (i == 1) /* // case */
2244 wrbuf_puts(xpath_rev, ".*");
2246 if (xpath[level].predicate &&
2247 xpath[level].predicate->which == XPATH_PREDICATE_RELATION &&
2248 xpath[level].predicate->u.relation.name[0])
2250 WRBUF wbuf = wrbuf_alloc();
2251 wrbuf_puts(wbuf, xpath[level].predicate->u.relation.name+1);
2252 if (xpath[level].predicate->u.relation.value)
2254 const char *cp = xpath[level].predicate->u.relation.value;
2255 wrbuf_putc(wbuf, '=');
2259 if (strchr(REGEX_CHARS, *cp))
2260 wrbuf_putc(wbuf, '\\');
2261 wrbuf_putc(wbuf, *cp);
2265 rset_attr = xpath_trunc(
2266 zh, stream, "0", wrbuf_cstr(wbuf),
2267 ZEBRA_XPATH_ATTR_NAME,
2269 wrbuf_destroy(wbuf);
2275 wrbuf_destroy(xpath_rev);
2279 yaz_log(log_level_rpn, "xpath_rev (%d) = %s", level,
2280 wrbuf_cstr(xpath_rev));
2281 if (wrbuf_len(xpath_rev))
2283 rset_start_tag = xpath_trunc(zh, stream, "0",
2284 wrbuf_cstr(xpath_rev),
2285 ZEBRA_XPATH_ELM_BEGIN,
2288 rset = rset_start_tag;
2291 rset_end_tag = xpath_trunc(zh, stream, "0",
2292 wrbuf_cstr(xpath_rev),
2293 ZEBRA_XPATH_ELM_END,
2296 rset = rset_create_between(rset_nmem, kc, kc->scope,
2297 rset_start_tag, rset,
2298 rset_end_tag, rset_attr);
2301 wrbuf_destroy(xpath_rev);
2309 #define MAX_XPATH_STEPS 10
2311 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2312 Z_AttributesPlusTerm *zapt,
2313 const Odr_oid *attributeSet,
2314 zint hits_limit, NMEM stream,
2315 Z_SortKeySpecList *sort_sequence,
2318 struct rset_key_control *kc);
2320 static ZEBRA_RES rpn_search_APT(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
2321 const Odr_oid *attributeSet,
2322 zint hits_limit, NMEM stream,
2323 Z_SortKeySpecList *sort_sequence,
2324 int num_bases, const char **basenames,
2327 struct rset_key_control *kc)
2329 RSET *rsets = nmem_malloc(stream, num_bases * sizeof(*rsets));
2330 ZEBRA_RES res = ZEBRA_OK;
2332 for (i = 0; i < num_bases; i++)
2335 if (zebraExplain_curDatabase(zh->reg->zei, basenames[i]))
2337 zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE,
2342 res = rpn_search_database(zh, zapt, attributeSet, hits_limit, stream,
2344 rset_nmem, rsets+i, kc);
2345 if (res != ZEBRA_OK)
2348 if (res != ZEBRA_OK)
2349 { /* must clean up the already created sets */
2351 rset_delete(rsets[i]);
2358 else if (num_bases == 0)
2359 *rset = rset_create_null(rset_nmem, kc, 0);
2361 *rset = rset_create_or(rset_nmem, kc, kc->scope, 0 /* TERMID */,
2367 static ZEBRA_RES rpn_search_database(ZebraHandle zh,
2368 Z_AttributesPlusTerm *zapt,
2369 const Odr_oid *attributeSet,
2370 zint hits_limit, NMEM stream,
2371 Z_SortKeySpecList *sort_sequence,
2374 struct rset_key_control *kc)
2376 ZEBRA_RES res = ZEBRA_OK;
2377 const char *index_type;
2378 char *search_type = NULL;
2379 char rank_type[128];
2382 char termz[IT_MAX_WORD+1];
2384 const char *xpath_use = 0;
2385 struct xpath_location_step xpath[MAX_XPATH_STEPS];
2389 log_level_rpn = yaz_log_module_level("rpn");
2392 zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type,
2393 rank_type, &complete_flag, &sort_flag);
2395 yaz_log(YLOG_DEBUG, "index_type=%s", index_type);
2396 yaz_log(YLOG_DEBUG, "complete_flag=%d", complete_flag);
2397 yaz_log(YLOG_DEBUG, "search_type=%s", search_type);
2398 yaz_log(YLOG_DEBUG, "rank_type=%s", rank_type);
2400 if (zapt_term_to_utf8(zh, zapt, termz) == ZEBRA_FAIL)
2404 return rpn_sort_spec(zh, zapt, attributeSet, stream, sort_sequence,
2405 rank_type, rset_nmem, rset, kc);
2406 /* consider if an X-Path query is used */
2407 xpath_len = rpn_check_xpath(zh, zapt, attributeSet,
2408 xpath, MAX_XPATH_STEPS, stream);
2411 if (xpath[xpath_len-1].part[0] == '@')
2412 xpath_use = ZEBRA_XPATH_ATTR_CDATA; /* last step is attribute */
2414 xpath_use = ZEBRA_XPATH_CDATA; /* searching for cdata */
2421 attr_init_APT(&relation, zapt, 2);
2422 relation_value = attr_find(&relation, NULL);
2424 if (relation_value == 103) /* alwaysmatches */
2426 *rset = 0; /* signal no "term" set */
2427 return rpn_search_xpath(zh, stream, rank_type, *rset,
2428 xpath_len, xpath, rset_nmem, rset, kc);
2433 /* search using one of the various search type strategies
2434 termz is our UTF-8 search term
2435 attributeSet is top-level default attribute set
2436 stream is ODR for search
2437 reg_id is the register type
2438 complete_flag is 1 for complete subfield, 0 for incomplete
2439 xpath_use is use-attribute to be used for X-Path search, 0 for none
2441 if (!strcmp(search_type, "phrase"))
2443 res = rpn_search_APT_phrase(zh, zapt, termz, attributeSet, hits_limit,
2445 index_type, complete_flag, rank_type,
2450 else if (!strcmp(search_type, "and-list"))
2452 res = rpn_search_APT_and_list(zh, zapt, termz, attributeSet, hits_limit,
2454 index_type, complete_flag, rank_type,
2459 else if (!strcmp(search_type, "or-list"))
2461 res = rpn_search_APT_or_list(zh, zapt, termz, attributeSet, hits_limit,
2463 index_type, complete_flag, rank_type,
2468 else if (!strcmp(search_type, "local"))
2470 res = rpn_search_APT_local(zh, zapt, termz, attributeSet, stream,
2471 rank_type, rset_nmem, rset, kc);
2473 else if (!strcmp(search_type, "numeric"))
2475 res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit,
2477 index_type, complete_flag, rank_type,
2484 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2487 if (res != ZEBRA_OK)
2491 return rpn_search_xpath(zh, stream, rank_type, *rset,
2492 xpath_len, xpath, rset_nmem, rset, kc);
2495 static ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2496 const Odr_oid *attributeSet,
2498 NMEM stream, NMEM rset_nmem,
2499 Z_SortKeySpecList *sort_sequence,
2500 int num_bases, const char **basenames,
2501 RSET **result_sets, int *num_result_sets,
2502 Z_Operator *parent_op,
2503 struct rset_key_control *kc);
2505 ZEBRA_RES rpn_get_top_approx_limit(ZebraHandle zh, Z_RPNStructure *zs,
2508 ZEBRA_RES res = ZEBRA_OK;
2509 if (zs->which == Z_RPNStructure_complex)
2511 if (res == ZEBRA_OK)
2512 res = rpn_get_top_approx_limit(zh, zs->u.complex->s1,
2514 if (res == ZEBRA_OK)
2515 res = rpn_get_top_approx_limit(zh, zs->u.complex->s2,
2518 else if (zs->which == Z_RPNStructure_simple)
2520 if (zs->u.simple->which == Z_Operand_APT)
2522 Z_AttributesPlusTerm *zapt = zs->u.simple->u.attributesPlusTerm;
2523 AttrType global_hits_limit_attr;
2526 attr_init_APT(&global_hits_limit_attr, zapt, 12);
2528 l = attr_find(&global_hits_limit_attr, NULL);
2536 ZEBRA_RES rpn_search_top(ZebraHandle zh, Z_RPNStructure *zs,
2537 const Odr_oid *attributeSet,
2539 NMEM stream, NMEM rset_nmem,
2540 Z_SortKeySpecList *sort_sequence,
2541 int num_bases, const char **basenames,
2544 RSET *result_sets = 0;
2545 int num_result_sets = 0;
2547 struct rset_key_control *kc = zebra_key_control_create(zh);
2549 res = rpn_search_structure(zh, zs, attributeSet, hits_limit,
2552 num_bases, basenames,
2553 &result_sets, &num_result_sets,
2554 0 /* no parent op */,
2556 if (res != ZEBRA_OK)
2559 for (i = 0; i < num_result_sets; i++)
2560 rset_delete(result_sets[i]);
2565 assert(num_result_sets == 1);
2566 assert(result_sets);
2567 assert(*result_sets);
2568 *result_set = *result_sets;
2574 ZEBRA_RES rpn_search_structure(ZebraHandle zh, Z_RPNStructure *zs,
2575 const Odr_oid *attributeSet, zint hits_limit,
2576 NMEM stream, NMEM rset_nmem,
2577 Z_SortKeySpecList *sort_sequence,
2578 int num_bases, const char **basenames,
2579 RSET **result_sets, int *num_result_sets,
2580 Z_Operator *parent_op,
2581 struct rset_key_control *kc)
2583 *num_result_sets = 0;
2584 if (zs->which == Z_RPNStructure_complex)
2587 Z_Operator *zop = zs->u.complex->roperator;
2588 RSET *result_sets_l = 0;
2589 int num_result_sets_l = 0;
2590 RSET *result_sets_r = 0;
2591 int num_result_sets_r = 0;
2593 res = rpn_search_structure(zh, zs->u.complex->s1,
2594 attributeSet, hits_limit, stream, rset_nmem,
2596 num_bases, basenames,
2597 &result_sets_l, &num_result_sets_l,
2599 if (res != ZEBRA_OK)
2602 for (i = 0; i < num_result_sets_l; i++)
2603 rset_delete(result_sets_l[i]);
2606 res = rpn_search_structure(zh, zs->u.complex->s2,
2607 attributeSet, hits_limit, stream, rset_nmem,
2609 num_bases, basenames,
2610 &result_sets_r, &num_result_sets_r,
2612 if (res != ZEBRA_OK)
2615 for (i = 0; i < num_result_sets_l; i++)
2616 rset_delete(result_sets_l[i]);
2617 for (i = 0; i < num_result_sets_r; i++)
2618 rset_delete(result_sets_r[i]);
2622 /* make a new list of result for all children */
2623 *num_result_sets = num_result_sets_l + num_result_sets_r;
2624 *result_sets = nmem_malloc(stream, *num_result_sets *
2625 sizeof(**result_sets));
2626 memcpy(*result_sets, result_sets_l,
2627 num_result_sets_l * sizeof(**result_sets));
2628 memcpy(*result_sets + num_result_sets_l, result_sets_r,
2629 num_result_sets_r * sizeof(**result_sets));
2631 if (!parent_op || parent_op->which != zop->which
2632 || (zop->which != Z_Operator_and &&
2633 zop->which != Z_Operator_or))
2635 /* parent node different from this one (or non-present) */
2636 /* we must combine result sets now */
2640 case Z_Operator_and:
2641 rset = rset_create_and(rset_nmem, kc,
2643 *num_result_sets, *result_sets);
2646 rset = rset_create_or(rset_nmem, kc,
2647 kc->scope, 0, /* termid */
2648 *num_result_sets, *result_sets);
2650 case Z_Operator_and_not:
2651 rset = rset_create_not(rset_nmem, kc,
2656 case Z_Operator_prox:
2657 if (zop->u.prox->which != Z_ProximityOperator_known)
2660 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2664 if (*zop->u.prox->u.known != Z_ProxUnit_word)
2666 zebra_setError_zint(zh,
2667 YAZ_BIB1_UNSUPP_PROX_UNIT_CODE,
2668 *zop->u.prox->u.known);
2673 rset = rset_create_prox(rset_nmem, kc,
2675 *num_result_sets, *result_sets,
2676 *zop->u.prox->ordered,
2677 (!zop->u.prox->exclusion ?
2678 0 : *zop->u.prox->exclusion),
2679 *zop->u.prox->relationType,
2680 *zop->u.prox->distance );
2684 zebra_setError(zh, YAZ_BIB1_OPERATOR_UNSUPP, 0);
2687 *num_result_sets = 1;
2688 *result_sets = nmem_malloc(stream, *num_result_sets *
2689 sizeof(**result_sets));
2690 (*result_sets)[0] = rset;
2693 else if (zs->which == Z_RPNStructure_simple)
2698 if (zs->u.simple->which == Z_Operand_APT)
2700 yaz_log(YLOG_DEBUG, "rpn_search_APT");
2701 res = rpn_search_APT(zh, zs->u.simple->u.attributesPlusTerm,
2702 attributeSet, hits_limit,
2703 stream, sort_sequence,
2704 num_bases, basenames, rset_nmem, &rset,
2706 if (res != ZEBRA_OK)
2709 else if (zs->u.simple->which == Z_Operand_resultSetId)
2711 yaz_log(YLOG_DEBUG, "rpn_search_ref");
2712 rset = resultSetRef(zh, zs->u.simple->u.resultSetId);
2716 YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
2717 zs->u.simple->u.resultSetId);
2724 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2727 *num_result_sets = 1;
2728 *result_sets = nmem_malloc(stream, *num_result_sets *
2729 sizeof(**result_sets));
2730 (*result_sets)[0] = rset;
2734 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, 0);
2745 * c-file-style: "Stroustrup"
2746 * indent-tabs-mode: nil
2748 * vim: shiftwidth=4 tabstop=8 expandtab