From 33ae4eec22d719299cb2c4e2a3853bde0c1f5794 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 7 Feb 2011 11:14:33 +0000 Subject: [PATCH] Verbose level for register check --- include/idzebra/api.h | 2 +- index/extract.c | 97 ++++++++++++++++++++++++----------------------- index/index.h | 3 ++ index/zebraapi.c | 100 +++++++++++++++++++++++++++++++++++-------------- index/zebraidx.c | 9 ++++- 5 files changed, 133 insertions(+), 78 deletions(-) diff --git a/include/idzebra/api.h b/include/idzebra/api.h index 82bf7ce..c962f8f 100644 --- a/include/idzebra/api.h +++ b/include/idzebra/api.h @@ -511,7 +511,7 @@ ZEBRA_RES zebra_set_break_handler(ZebraHandle zh, void *client_data); YAZ_EXPORT -ZEBRA_RES zebra_register_check(ZebraHandle zh); +ZEBRA_RES zebra_register_check(ZebraHandle zh, int verbose_level); YAZ_END_CDECL diff --git a/index/extract.c b/index/extract.c index a3315ee..8978a53 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1333,6 +1333,56 @@ ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n) return ZEBRA_OK; } +void zebra_it_key_str_dump(ZebraHandle zh, struct it_key *key, + const char *str, size_t slen, NMEM nmem, int level) +{ + char keystr[200]; /* room for zints to print */ + char *dst_term = 0; + int ord = CAST_ZINT_TO_INT(key->mem[0]); + const char *index_type; + int i; + const char *string_index; + + zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, + 0/* db */, &string_index); + assert(index_type); + zebra_term_untrans_iconv(zh, nmem, index_type, + &dst_term, str); + *keystr = '\0'; + for (i = 0; i < key->len; i++) + { + sprintf(keystr + strlen(keystr), ZINT_FORMAT " ", key->mem[i]); + } + + if (*str < CHR_BASE_CHAR) + { + int i; + char dst_buf[200]; /* room for special chars */ + + strcpy(dst_buf , "?"); + + if (!strcmp(str, "")) + strcpy(dst_buf, "alwaysmatches"); + if (!strcmp(str, FIRST_IN_FIELD_STR)) + strcpy(dst_buf, "firstinfield"); + else if (!strcmp(str, CHR_UNKNOWN)) + strcpy(dst_buf, "unknown"); + else if (!strcmp(str, CHR_SPACE)) + strcpy(dst_buf, "space"); + + for (i = 0; ireg->zei, ord, &index_type, - 0/* db */, &string_index); - assert(index_type); - zebra_term_untrans_iconv(zh, nmem, index_type, - &dst_term, str); - *keystr = '\0'; - for (i = 0; i= sizeof(ord_buf)-1) + { + if (verbose_level >= 1) + { + /* so bad it can not fit into our ord_buf */ + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": long dictionary entry %d + %d", + rec->sysno, ord_len, (int) slen); + } + ++no_long_dict_entries; + continue; + } memcpy(ord_buf + ord_len, str, slen); ord_buf[ord_len + slen] = '\0'; if (ord_len + slen >= IT_MAX_WORD) + { + if (verbose_level >= 1) + { + do_log = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": long dictionary entry %d + %d", + rec->sysno, (int) ord_len, (int) slen); + } ++no_long_dict_entries; - else + } + info = dict_lookup(zh->reg->dict, ord_buf); + if (!info) { - char *info = dict_lookup(zh->reg->dict, ord_buf); - if (!info) - no_failed_dict_lookup++; - else + if (verbose_level >= 1) { - ; + do_log = 1; + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": term do not exist in dictionary", rec->sysno); } + no_failed_dict_lookup++; + } + if (key_in.len < 2 || key_in.len > 4) + { + if (verbose_level >= 1) + { + yaz_log(YLOG_WARN, "Record " ZINT_FORMAT + ": unexpected key length %d", + rec->sysno, key_in.len); + do_log = 1; + } + no_invalid_keys++; + } + if (do_log) + { + zebra_it_key_str_dump(zh, &key_in, str, + slen, nmem, YLOG_LOG); + nmem_reset(nmem); } - (*no_keys)++; } - if (no_long_dict_entries) - { - yaz_log(YLOG_WARN, "Record id " ZINT_FORMAT - " has %d dictionary entries that are too long", - rec->sysno, no_long_dict_entries); - } - if (no_failed_dict_lookup) - { - yaz_log(YLOG_WARN, "Record id " ZINT_FORMAT - " has %d terms that do not exist in dictionary", - rec->sysno, no_failed_dict_lookup); - } - res = ZEBRA_OK; + if (!no_long_dict_entries && !no_failed_dict_lookup && !no_invalid_keys) + res = ZEBRA_OK; + nmem_destroy(nmem); } zebra_rec_keys_close(keys); return res; } -ZEBRA_RES zebra_register_check(ZebraHandle zh) +ZEBRA_RES zebra_register_check(ZebraHandle zh, int verbose_level) { ZEBRA_RES res = ZEBRA_FAIL; if (zebra_begin_read(zh) == ZEBRA_OK) { - zint no_records = 0; + zint no_records_total = 0; + zint no_records_fail = 0; zint total_keys = 0; if (zh->reg) { Record rec = rec_get_root(zh->reg->records); + res = ZEBRA_OK; while (rec) { Record r1; zint no_keys; - zebra_record_check(zh, rec, &no_keys); + if (zebra_record_check(zh, rec, &no_keys, verbose_level) + != ZEBRA_OK) + { + res = ZEBRA_FAIL; + no_records_fail++; + } + r1 = rec_get_next(zh->reg->records, rec); rec_free(&rec); rec = r1; - no_records++; + no_records_total++; total_keys += no_keys; } - res = ZEBRA_OK; - yaz_log(YLOG_LOG, "records: " ZINT_FORMAT, no_records); + yaz_log(YLOG_LOG, "records total: " ZINT_FORMAT, no_records_total); + yaz_log(YLOG_LOG, "records fail: " ZINT_FORMAT, no_records_fail); yaz_log(YLOG_LOG, "keys: " ZINT_FORMAT, total_keys); } zebra_end_read(zh); diff --git a/index/zebraidx.c b/index/zebraidx.c index c411901..a9efb40 100644 --- a/index/zebraidx.c +++ b/index/zebraidx.c @@ -177,9 +177,14 @@ int main(int argc, char **argv) { show_filters(zs); } - else if (!strcmp(arg, "check")) + else if (!strcmp(arg, "check0")) { - if (zebra_register_check(zh) != ZEBRA_OK) + if (zebra_register_check(zh, 0) != ZEBRA_OK) + yaz_log(YLOG_WARN, "register check failed"); + } + else if (!strcmp(arg, "check1") || !strcmp(arg, "check")) + { + if (zebra_register_check(zh, 1) != ZEBRA_OK) yaz_log(YLOG_WARN, "register check failed"); } else -- 1.7.10.4