X-Git-Url: http://lists.indexdata.dk/cgi-bin?a=blobdiff_plain;ds=sidebyside;f=index%2Frpnsearch.c;h=8b474a99af42e9118a73ffe723d78dc48e7ce4d2;hb=85ad68ab178a261dc548284ee68aae9107cbfaaf;hp=6eb12e6029a471d67ecfe97b59030711bf6bd2da;hpb=a030c87bc444608639905eca95e29f84a4f1d991;p=idzebra-moved-to-github.git diff --git a/index/rpnsearch.c b/index/rpnsearch.c index 6eb12e6..8b474a9 100644 --- a/index/rpnsearch.c +++ b/index/rpnsearch.c @@ -17,6 +17,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#if HAVE_CONFIG_H +#include +#endif #include #include #ifdef WIN32 @@ -231,24 +234,113 @@ static void add_non_space(const char *start, const char *end, } +static int term_102_icu(zebra_map_t zm, + const char **src, WRBUF term_dict, int space_split, + WRBUF display_term) +{ + int no_terms = 0; + const char *s0 = *src, *s1; + while (*s0 == ' ') + s0++; + s1 = s0; + for (;;) + { + if (*s1 == ' ' && space_split) + break; + else if (*s1 && !strchr(REGEX_CHARS "-", *s1)) + s1++; + else + { + /* EOF or regex reserved char */ + if (s0 != s1) + { + const char *res_buf = 0; + size_t res_len = 0; + const char *display_buf; + size_t display_len; + + zebra_map_tokenize_start(zm, s0, s1 - s0); + + if (zebra_map_tokenize_next(zm, &res_buf, &res_len, + &display_buf, &display_len)) + { + size_t i = res_len; + while (--i >= 0 && res_buf[i] != '\x01') + ; + if (i > 0) + { + while (--i >= 0 && res_buf[i] != '\x01') + ; + } + res_len = i; /* reduce res_len */ + for (i = 0; i < res_len; i++) + { + if (strchr(REGEX_CHARS "\\", res_buf[i])) + wrbuf_putc(term_dict, '\\'); + if (res_buf[i] < 32) + wrbuf_putc(term_dict, '\x01'); + + wrbuf_putc(term_dict, res_buf[i]); + } + wrbuf_write(display_term, display_buf, display_len); + + no_terms++; + } + } + if (*s1 == '\0') + break; + + wrbuf_putc(term_dict, *s1); + wrbuf_putc(display_term, *s1); + + s1++; + s0 = s1; + } + } + if (no_terms) + wrbuf_puts(term_dict, "\x01\x01.*"); + *src = s1; + return no_terms; +} + static int term_100_icu(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term, - int right_trunc) + int mode) { - int i; + size_t i; const char *res_buf = 0; size_t res_len = 0; const char *display_buf; size_t display_len; + const char *s0 = *src, *s1; + + while (*s0 == ' ') + s0++; + + if (*s0 == '\0') + return 0; + + if (space_split) + { + s1 = s0; + while (*s1 && *s1 != ' ') + s1++; + } + else + s1 = s0 + strlen(s0); + + *src = s1; + + zebra_map_tokenize_start(zm, s0, s1 - s0); + if (!zebra_map_tokenize_next(zm, &res_buf, &res_len, &display_buf, &display_len)) { - *src += strlen(*src); return 0; } wrbuf_write(display_term, display_buf, display_len); - if (right_trunc) + if (mode) { /* ICU sort keys seem to be of the form basechars \x01 accents \x01 length @@ -269,17 +361,21 @@ static int term_100_icu(zebra_map_t zm, } res_len = i; /* reduce res_len */ } + if (mode & 2) + wrbuf_puts(term_dict, ".*"); for (i = 0; i < res_len; i++) { if (strchr(REGEX_CHARS "\\", res_buf[i])) wrbuf_putc(term_dict, '\\'); if (res_buf[i] < 32) - wrbuf_putc(term_dict, 1); - + wrbuf_putc(term_dict, '\x01'); + wrbuf_putc(term_dict, res_buf[i]); } - if (right_trunc) + if (mode & 1) wrbuf_puts(term_dict, ".*"); + else if (mode) + wrbuf_puts(term_dict, "\x01\x01.*"); return 1; } @@ -430,7 +526,7 @@ static int term_102(zebra_map_t zm, const char **src, } -/* term_104: handle term, process # and ! */ +/* term_104: handle term, process ?n * # */ static int term_104(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term) { @@ -499,7 +595,7 @@ static int term_104(zebra_map_t zm, const char **src, return i; } -/* term_105/106: handle term, where trunc = Process * and ! and right trunc */ +/* term_105/106: handle term, process * ! and possibly right_truncate */ static int term_105(zebra_map_t zm, const char **src, WRBUF term_dict, int space_split, WRBUF display_term, int right_truncate) @@ -1045,6 +1141,13 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return ZEBRA_OK; } break; + case 102: + if (!term_102_icu(zm, &termp, term_dict, space_split, display_term)) + { + *term_sub = 0; + return ZEBRA_OK; + } + break; case 1: /* right truncation */ if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 1)) { @@ -1052,6 +1155,20 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return ZEBRA_OK; } break; + case 2: + if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 2)) + { + *term_sub = 0; + return ZEBRA_OK; + } + break; + case 3: + if (!term_100_icu(zm, &termp, term_dict, space_split, display_term, 3)) + { + *term_sub = 0; + return ZEBRA_OK; + } + break; default: zebra_setError_zint(zh, YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, @@ -1144,7 +1261,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } wrbuf_putc(term_dict, ')'); break; - case 104: /* process # and ! in term */ + case 104: /* process ?n * # term */ wrbuf_putc(term_dict, '('); if (!term_104(zm, &termp, term_dict, space_split, display_term)) { @@ -1153,7 +1270,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } wrbuf_putc(term_dict, ')'); break; - case 105: /* process * and ! in term */ + case 105: /* process * ! in term and right truncate */ wrbuf_putc(term_dict, '('); if (!term_105(zm, &termp, term_dict, space_split, display_term, 1)) { @@ -1162,7 +1279,7 @@ static ZEBRA_RES string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, } wrbuf_putc(term_dict, ')'); break; - case 106: /* process * and ! in term */ + case 106: /* process * ! in term */ wrbuf_putc(term_dict, '('); if (!term_105(zm, &termp, term_dict, space_split, display_term, 0)) { @@ -1367,8 +1484,6 @@ static ZEBRA_RES search_terms_list(ZebraHandle zh, struct rset_key_control *kc) { zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); - if (zebra_maps_is_icu(zm)) - zebra_map_tokenize_start(zm, termz, strlen(termz)); return search_terms_chrmap(zh, zapt, termz, attributeSet, hits_limit, stream, index_type, complete_flag, rank_type, xpath_use, @@ -1808,6 +1923,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, Z_AttributesPlusTerm *zapt, const char *termz, const Odr_oid *attributeSet, + zint hits_limit, NMEM stream, const char *index_type, int complete_flag, @@ -1823,7 +1939,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, ZEBRA_RES res; struct grep_info grep_info; int alloc_sets = 0; - zint hits_limit_value; + zint hits_limit_value = hits_limit; const char *term_ref_id_str = 0; zebra_term_limits_APT(zh, zapt, &hits_limit_value, &term_ref_id_str, @@ -2040,7 +2156,7 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, return rset_create_null(rset_nmem, kc, 0); else { - int i, r, max_pos; + int i, max_pos; char ord_buf[32]; RSET rset; WRBUF term_dict = wrbuf_alloc(); @@ -2058,8 +2174,8 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, wrbuf_puts(term_dict, term); grep_info.isam_p_indx = 0; - r = dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0, - &grep_info, &max_pos, 0, grep_handle); + dict_lookup_grep(zh->reg->dict, wrbuf_cstr(term_dict), 0, + &grep_info, &max_pos, 0, grep_handle); yaz_log(YLOG_DEBUG, "%s %d positions", term, grep_info.isam_p_indx); rset = rset_trunc(zh, grep_info.isam_p_buf, @@ -2380,7 +2496,8 @@ static ZEBRA_RES rpn_search_database(ZebraHandle zh, } else if (!strcmp(search_type, "numeric")) { - res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, stream, + res = rpn_search_APT_numeric(zh, zapt, termz, attributeSet, hits_limit, + stream, index_type, complete_flag, rank_type, xpath_use, rset_nmem,