X-Git-Url: http://lists.indexdata.dk/cgi-bin?a=blobdiff_plain;f=src%2Fcharsets.c;h=44ef5fc3d1d69d600d5d9499c0bd40ef5c692a5f;hb=2de4cab9b87f848767078447142668fc3c30e5c9;hp=7bbe102e858788409e5f6d1b35ec70beb6d5145c;hpb=6ff7cb53029747ad6ac60fde903630ea063b5218;p=pazpar2-moved-to-github.git diff --git a/src/charsets.c b/src/charsets.c index 7bbe102..44ef5fc 100644 --- a/src/charsets.c +++ b/src/charsets.c @@ -44,6 +44,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA struct pp2_charset_s { const char *(*token_next_handler)(pp2_relevance_token_t prt); const char *(*get_sort_handler)(pp2_relevance_token_t prt); + const char *(*get_display_handler)(pp2_relevance_token_t prt); int ref_count; #if YAZ_HAVE_ICU struct icu_chain * icu_chn; @@ -54,10 +55,12 @@ struct pp2_charset_s { static const char *pp2_relevance_token_null(pp2_relevance_token_t prt); static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt); static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt); +static const char *pp2_get_display_ascii(pp2_relevance_token_t prt); #if YAZ_HAVE_ICU static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt); static const char *pp2_get_sort_icu(pp2_relevance_token_t prt); +static const char *pp2_get_display_icu(pp2_relevance_token_t prt); #endif /* tokenzier handle */ @@ -123,6 +126,7 @@ pp2_charset_t pp2_charset_create(struct icu_chain *icu_chn) pct->token_next_handler = pp2_relevance_token_null; pct->get_sort_handler = pp2_get_sort_ascii; + pct->get_display_handler = pp2_get_display_ascii; pct->ref_count = 1; #if YAZ_HAVE_ICU pct->icu_chn = 0; @@ -132,6 +136,7 @@ pp2_charset_t pp2_charset_create(struct icu_chain *icu_chn) pct->icu_sts = U_ZERO_ERROR; pct->token_next_handler = pp2_relevance_token_icu; pct->get_sort_handler = pp2_get_sort_icu; + pct->get_display_handler = pp2_get_display_icu; } #endif // YAZ_HAVE_ICU return pct; @@ -184,8 +189,6 @@ void pp2_relevance_first(pp2_relevance_token_t prt, char *pout = firstword; char articles[] = "the den der die des an a "; // must end in space - while (*p && !isalnum(*(unsigned char *)p)) - p++; for (; *p && *p != ' ' && pout - firstword < (sizeof(firstword)-2); p++) *pout++ = tolower(*(unsigned char *)p); *pout++ = ' '; @@ -232,6 +235,11 @@ const char *pp2_get_sort(pp2_relevance_token_t prt) return prt->pct->get_sort_handler(prt); } +const char *pp2_get_display(pp2_relevance_token_t prt) +{ + return prt->pct->get_display_handler(prt); +} + #define raw_char(c) (((c) >= 'a' && (c) <= 'z') ? (c) : -1) /* original tokenizer with our tokenize interface, but we add +1 to ensure no '\0' are in our string (except for EOF) @@ -280,6 +288,16 @@ static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt) } } +static const char *pp2_get_display_ascii(pp2_relevance_token_t prt) +{ + if (prt->last_cp == 0) + return 0; + else + { + return wrbuf_cstr(prt->norm_str); + } +} + static const char *pp2_relevance_token_null(pp2_relevance_token_t prt) { const char *cp = prt->cp; @@ -306,6 +324,11 @@ static const char *pp2_get_sort_icu(pp2_relevance_token_t prt) return icu_iter_get_sortkey(prt->iter); } +static const char *pp2_get_display_icu(pp2_relevance_token_t prt) +{ + return icu_iter_get_display(prt->iter); +} + #endif // YAZ_HAVE_ICU