1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2008 Index Data
3 * See the file LICENSE for details.
17 #include <yaz/timing.h>
21 #include <yaz/xmalloc.h>
23 #include <yaz/icu_I18N.h>
31 #include <unicode/ustring.h> /* some more string fcns*/
32 #include <unicode/uchar.h> /* char names */
35 #include <unicode/ucol.h>
38 int icu_check_status (UErrorCode status)
40 if (U_FAILURE(status))
42 yaz_log(YLOG_WARN, "ICU: %d %s\n", status, u_errorName(status));
51 struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity)
53 struct icu_buf_utf16 * buf16
54 = (struct icu_buf_utf16 *) xmalloc(sizeof(struct icu_buf_utf16));
61 buf16->utf16 = (UChar *) xmalloc(sizeof(UChar) * capacity);
62 buf16->utf16[0] = (UChar) 0;
63 buf16->utf16_cap = capacity;
68 struct icu_buf_utf16 * icu_buf_utf16_clear(struct icu_buf_utf16 * buf16)
72 buf16->utf16[0] = (UChar) 0;
78 struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
85 if (0 == buf16->utf16)
86 buf16->utf16 = (UChar *) xmalloc(sizeof(UChar) * capacity);
89 = (UChar *) xrealloc(buf16->utf16, sizeof(UChar) * capacity);
91 icu_buf_utf16_clear(buf16);
92 buf16->utf16_cap = capacity;
105 struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
106 struct icu_buf_utf16 * src16)
112 if (dest16->utf16_cap < src16->utf16_len)
113 icu_buf_utf16_resize(dest16, src16->utf16_len * 2);
115 u_strncpy(dest16->utf16, src16->utf16, src16->utf16_len);
116 dest16->utf16_len = src16->utf16_len;
122 void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16)
131 struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity)
133 struct icu_buf_utf8 * buf8
134 = (struct icu_buf_utf8 *) xmalloc(sizeof(struct icu_buf_utf8));
141 buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
142 buf8->utf8[0] = (uint8_t) 0;
143 buf8->utf8_cap = capacity;
149 struct icu_buf_utf8 * icu_buf_utf8_clear(struct icu_buf_utf8 * buf8)
153 buf8->utf8[0] = (uint8_t) 0;
160 struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
168 buf8->utf8 = (uint8_t *) xmalloc(sizeof(uint8_t) * capacity);
171 = (uint8_t *) xrealloc(buf8->utf8, sizeof(uint8_t) * capacity);
173 buf8->utf8_cap = capacity;
186 const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8)
188 if (!src8 || src8->utf8_len == 0)
191 if (src8->utf8_len == src8->utf8_cap)
192 src8 = icu_buf_utf8_resize(src8, src8->utf8_len * 2 + 1);
194 src8->utf8[src8->utf8_len] = '\0';
196 return (const char *) src8->utf8;
200 void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8)
209 UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16,
210 struct icu_buf_utf8 * src8,
213 int32_t utf16_len = 0;
215 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
217 (const char *) src8->utf8, src8->utf8_len, status);
219 /* check for buffer overflow, resize and retry */
220 if (*status == U_BUFFER_OVERFLOW_ERROR)
222 icu_buf_utf16_resize(dest16, utf16_len * 2);
223 *status = U_ZERO_ERROR;
224 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
226 (const char *) src8->utf8, src8->utf8_len, status);
229 if (U_SUCCESS(*status)
230 && utf16_len <= dest16->utf16_cap)
231 dest16->utf16_len = utf16_len;
233 icu_buf_utf16_clear(dest16);
240 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
241 const char * src8cstr,
244 size_t src8cstr_len = 0;
245 int32_t utf16_len = 0;
247 *status = U_ZERO_ERROR;
248 src8cstr_len = strlen(src8cstr);
250 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
252 src8cstr, src8cstr_len, status);
254 /* check for buffer overflow, resize and retry */
255 if (*status == U_BUFFER_OVERFLOW_ERROR)
257 icu_buf_utf16_resize(dest16, utf16_len * 2);
258 *status = U_ZERO_ERROR;
259 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
261 src8cstr, src8cstr_len, status);
264 if (U_SUCCESS(*status)
265 && utf16_len <= dest16->utf16_cap)
266 dest16->utf16_len = utf16_len;
268 icu_buf_utf16_clear(dest16);
276 UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
277 struct icu_buf_utf16 * src16,
280 int32_t utf8_len = 0;
282 u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
284 src16->utf16, src16->utf16_len, status);
286 /* check for buffer overflow, resize and retry */
287 if (*status == U_BUFFER_OVERFLOW_ERROR)
289 icu_buf_utf8_resize(dest8, utf8_len * 2);
290 *status = U_ZERO_ERROR;
291 u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
293 src16->utf16, src16->utf16_len, status);
297 if (U_SUCCESS(*status)
298 && utf8_len <= dest8->utf8_cap)
299 dest8->utf8_len = utf8_len;
301 icu_buf_utf8_clear(dest8);
308 struct icu_casemap * icu_casemap_create(char action, UErrorCode *status)
310 struct icu_casemap * casemap
311 = (struct icu_casemap *) xmalloc(sizeof(struct icu_casemap));
312 casemap->action = action;
314 switch(casemap->action) {
325 icu_casemap_destroy(casemap);
332 void icu_casemap_destroy(struct icu_casemap * casemap)
338 int icu_casemap_casemap(struct icu_casemap * casemap,
339 struct icu_buf_utf16 * dest16,
340 struct icu_buf_utf16 * src16,
347 return icu_utf16_casemap(dest16, src16, locale,
348 casemap->action, status);
352 int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
353 struct icu_buf_utf16 * src16,
354 const char *locale, char action,
357 int32_t dest16_len = 0;
360 if (!src16->utf16_len){ /* guarding for empty source string */
362 dest16->utf16[0] = (UChar) 0;
363 dest16->utf16_len = 0;
371 dest16_len = u_strToLower(dest16->utf16, dest16->utf16_cap,
372 src16->utf16, src16->utf16_len,
377 dest16_len = u_strToUpper(dest16->utf16, dest16->utf16_cap,
378 src16->utf16, src16->utf16_len,
383 dest16_len = u_strToTitle(dest16->utf16, dest16->utf16_cap,
384 src16->utf16, src16->utf16_len,
389 dest16_len = u_strFoldCase(dest16->utf16, dest16->utf16_cap,
390 src16->utf16, src16->utf16_len,
391 U_FOLD_CASE_DEFAULT, status);
395 return U_UNSUPPORTED_ERROR;
399 /* check for buffer overflow, resize and retry */
400 if (*status == U_BUFFER_OVERFLOW_ERROR
401 && dest16 != src16 /* do not resize if in-place conversion */
403 icu_buf_utf16_resize(dest16, dest16_len * 2);
404 *status = U_ZERO_ERROR;
410 dest16_len = u_strToLower(dest16->utf16, dest16->utf16_cap,
411 src16->utf16, src16->utf16_len,
416 dest16_len = u_strToUpper(dest16->utf16, dest16->utf16_cap,
417 src16->utf16, src16->utf16_len,
422 dest16_len = u_strToTitle(dest16->utf16, dest16->utf16_cap,
423 src16->utf16, src16->utf16_len,
428 dest16_len = u_strFoldCase(dest16->utf16, dest16->utf16_cap,
429 src16->utf16, src16->utf16_len,
430 U_FOLD_CASE_DEFAULT, status);
434 return U_UNSUPPORTED_ERROR;
439 if (U_SUCCESS(*status)
440 && dest16_len <= dest16->utf16_cap)
441 dest16->utf16_len = dest16_len;
444 dest16->utf16[0] = (UChar) 0;
445 dest16->utf16_len = 0;
453 void icu_sortkey8_from_utf16(UCollator *coll,
454 struct icu_buf_utf8 * dest8,
455 struct icu_buf_utf16 * src16,
459 int32_t sortkey_len = 0;
461 sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
462 dest8->utf8, dest8->utf8_cap);
464 /* check for buffer overflow, resize and retry */
465 if (sortkey_len > dest8->utf8_cap) {
466 icu_buf_utf8_resize(dest8, sortkey_len * 2);
467 sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
468 dest8->utf8, dest8->utf8_cap);
471 if (U_SUCCESS(*status)
473 dest8->utf8_len = sortkey_len;
475 icu_buf_utf8_clear(dest8);
480 struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
483 struct icu_tokenizer * tokenizer
484 = (struct icu_tokenizer *) xmalloc(sizeof(struct icu_tokenizer));
486 tokenizer->action = action;
488 tokenizer->buf16 = 0;
489 tokenizer->token_count = 0;
490 tokenizer->token_id = 0;
491 tokenizer->token_start = 0;
492 tokenizer->token_end = 0;
495 switch(tokenizer->action) {
498 tokenizer->bi = ubrk_open(UBRK_LINE, locale, 0, 0, status);
502 tokenizer->bi = ubrk_open(UBRK_SENTENCE, locale, 0, 0, status);
506 tokenizer->bi = ubrk_open(UBRK_WORD, locale, 0, 0, status);
510 tokenizer->bi = ubrk_open(UBRK_CHARACTER, locale, 0, 0, status);
514 tokenizer->bi = ubrk_open(UBRK_TITLE, locale, 0, 0, status);
517 *status = U_UNSUPPORTED_ERROR;
522 /* ICU error stuff is a very funny business */
523 if (U_SUCCESS(*status))
526 /* freeing if failed */
527 icu_tokenizer_destroy(tokenizer);
531 void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer)
535 ubrk_close(tokenizer->bi);
540 int icu_tokenizer_attach(struct icu_tokenizer * tokenizer,
541 struct icu_buf_utf16 * src16,
544 if (!tokenizer || !tokenizer->bi || !src16)
548 tokenizer->buf16 = src16;
549 tokenizer->token_count = 0;
550 tokenizer->token_id = 0;
551 tokenizer->token_start = 0;
552 tokenizer->token_end = 0;
554 ubrk_setText(tokenizer->bi, src16->utf16, src16->utf16_len, status);
557 if (U_FAILURE(*status))
563 int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer,
564 struct icu_buf_utf16 * tkn16,
567 int32_t tkn_start = 0;
572 if (!tokenizer || !tokenizer->bi
573 || !tokenizer->buf16 || !tokenizer->buf16->utf16_len)
577 never change tokenizer->buf16 and keep always invariant
578 0 <= tokenizer->token_start
579 <= tokenizer->token_end
580 <= tokenizer->buf16->utf16_len
581 returns length of token
584 if (0 == tokenizer->token_end) /* first call */
585 tkn_start = ubrk_first(tokenizer->bi);
586 else /* successive calls */
587 tkn_start = tokenizer->token_end;
589 /* get next position */
590 tkn_end = ubrk_next(tokenizer->bi);
592 /* repairing invariant at end of ubrk, which is UBRK_DONE = -1 */
593 if (UBRK_DONE == tkn_end)
594 tkn_end = tokenizer->buf16->utf16_len;
596 /* copy out if everything is well */
597 if(U_FAILURE(*status))
600 /* everything OK, now update internal state */
601 tkn_len = tkn_end - tkn_start;
604 tokenizer->token_count++;
605 tokenizer->token_id++;
607 tokenizer->token_id = 0;
609 tokenizer->token_start = tkn_start;
610 tokenizer->token_end = tkn_end;
613 /* copying into token buffer if it exists */
615 if (tkn16->utf16_cap < tkn_len)
616 icu_buf_utf16_resize(tkn16, (size_t) tkn_len * 2);
618 u_strncpy(tkn16->utf16, &(tokenizer->buf16->utf16)[tkn_start],
621 tkn16->utf16_len = tkn_len;
628 int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer)
630 return tokenizer->token_id;
633 int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer)
635 return tokenizer->token_start;
638 int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer)
640 return tokenizer->token_end;
643 int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer)
645 return (tokenizer->token_end - tokenizer->token_start);
648 int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer)
650 return tokenizer->token_count;
655 struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
659 struct icu_normalizer * normalizer
660 = (struct icu_normalizer *) xmalloc(sizeof(struct icu_normalizer));
662 normalizer->action = action;
663 normalizer->trans = 0;
664 normalizer->rules16 = icu_buf_utf16_create(0);
665 icu_utf16_from_utf8_cstr(normalizer->rules16, rules, status);
667 switch(normalizer->action) {
671 = utrans_openU(normalizer->rules16->utf16,
672 normalizer->rules16->utf16_len,
675 &normalizer->parse_error, status);
680 = utrans_openU(normalizer->rules16->utf16,
681 normalizer->rules16->utf16_len,
684 &normalizer->parse_error, status);
687 *status = U_UNSUPPORTED_ERROR;
692 if (U_SUCCESS(*status))
695 /* freeing if failed */
696 icu_normalizer_destroy(normalizer);
701 void icu_normalizer_destroy(struct icu_normalizer * normalizer){
703 if (normalizer->rules16)
704 icu_buf_utf16_destroy(normalizer->rules16);
705 if (normalizer->trans)
706 utrans_close(normalizer->trans);
713 int icu_normalizer_normalize(struct icu_normalizer * normalizer,
714 struct icu_buf_utf16 * dest16,
715 struct icu_buf_utf16 * src16,
718 if (!normalizer || !normalizer->trans
723 if (!src16->utf16_len){ /* guarding for empty source string */
724 icu_buf_utf16_clear(dest16);
728 if (!icu_buf_utf16_copy(dest16, src16))
732 utrans_transUChars (normalizer->trans,
733 dest16->utf16, &(dest16->utf16_len),
735 0, &(src16->utf16_len), status);
737 if (U_FAILURE(*status))
738 icu_buf_utf16_clear(dest16);
740 return dest16->utf16_len;
746 struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain,
747 enum icu_chain_step_type type,
748 const uint8_t * rule,
749 struct icu_buf_utf16 * buf16,
752 struct icu_chain_step * step = 0;
754 if(!chain || !type || !rule)
757 step = (struct icu_chain_step *) xmalloc(sizeof(struct icu_chain_step));
763 /* create auxilary objects */
765 case ICU_chain_step_type_display:
767 case ICU_chain_step_type_casemap:
768 step->u.casemap = icu_casemap_create(rule[0], status);
770 case ICU_chain_step_type_normalize:
771 step->u.normalizer = icu_normalizer_create((char *) rule, 'f', status);
773 case ICU_chain_step_type_tokenize:
774 step->u.tokenizer = icu_tokenizer_create((char *) chain->locale,
775 (char) rule[0], status);
785 void icu_chain_step_destroy(struct icu_chain_step * step){
790 icu_chain_step_destroy(step->previous);
793 case ICU_chain_step_type_display:
795 case ICU_chain_step_type_casemap:
796 icu_casemap_destroy(step->u.casemap);
797 icu_buf_utf16_destroy(step->buf16);
799 case ICU_chain_step_type_normalize:
800 icu_normalizer_destroy(step->u.normalizer);
801 icu_buf_utf16_destroy(step->buf16);
803 case ICU_chain_step_type_tokenize:
804 icu_tokenizer_destroy(step->u.tokenizer);
805 icu_buf_utf16_destroy(step->buf16);
815 struct icu_chain * icu_chain_create(const char *locale, int sort,
818 struct icu_chain * chain
819 = (struct icu_chain *) xmalloc(sizeof(struct icu_chain));
821 *status = U_ZERO_ERROR;
823 chain->locale = xstrdup(locale);
827 chain->coll = ucol_open((const char *) chain->locale, status);
829 if (U_FAILURE(*status))
832 chain->token_count = 0;
836 chain->display8 = icu_buf_utf8_create(0);
837 chain->norm8 = icu_buf_utf8_create(0);
838 chain->sort8 = icu_buf_utf8_create(0);
840 chain->src16 = icu_buf_utf16_create(0);
848 void icu_chain_destroy(struct icu_chain * chain)
853 ucol_close(chain->coll);
855 icu_buf_utf8_destroy(chain->display8);
856 icu_buf_utf8_destroy(chain->norm8);
857 icu_buf_utf8_destroy(chain->sort8);
859 icu_buf_utf16_destroy(chain->src16);
861 icu_chain_step_destroy(chain->steps);
862 xfree(chain->locale);
869 struct icu_chain * icu_chain_xml_config(const xmlNode *xml_node,
874 struct icu_chain * chain = 0;
876 *status = U_ZERO_ERROR;
878 if (!xml_node ||xml_node->type != XML_ELEMENT_NODE)
882 xmlChar * xml_locale = xmlGetProp((xmlNode *) xml_node,
883 (xmlChar *) "locale");
887 chain = icu_chain_create((const char *) xml_locale, sort, status);
895 for (node = xml_node->children; node; node = node->next)
898 struct icu_chain_step * step = 0;
900 if (node->type != XML_ELEMENT_NODE)
903 xml_rule = xmlGetProp(node, (xmlChar *) "rule");
905 if (!strcmp((const char *) node->name, "casemap"))
906 step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap,
907 (const uint8_t *) xml_rule, status);
908 else if (!strcmp((const char *) node->name, "transform"))
909 step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize,
910 (const uint8_t *) xml_rule, status);
911 else if (!strcmp((const char *) node->name, "tokenize"))
912 step = icu_chain_insert_step(chain, ICU_chain_step_type_tokenize,
913 (const uint8_t *) xml_rule, status);
914 else if (!strcmp((const char *) node->name, "display"))
915 step = icu_chain_insert_step(chain, ICU_chain_step_type_display,
916 (const uint8_t *) "", status);
918 if (!step || U_FAILURE(*status))
920 icu_chain_destroy(chain);
931 struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain,
932 enum icu_chain_step_type type,
933 const uint8_t * rule,
936 struct icu_chain_step * step = 0;
937 struct icu_buf_utf16 * src16 = 0;
938 struct icu_buf_utf16 * buf16 = 0;
940 if (!chain || !type || !rule)
943 /* assign utf16 src buffers as needed */
944 if (chain->steps && chain->steps->buf16)
945 src16 = chain->steps->buf16;
946 else if (chain->src16)
947 src16 = chain->src16;
952 /* create utf16 destination buffers as needed, or */
955 case ICU_chain_step_type_display:
958 case ICU_chain_step_type_casemap:
959 buf16 = icu_buf_utf16_create(0);
961 case ICU_chain_step_type_normalize:
962 buf16 = icu_buf_utf16_create(0);
964 case ICU_chain_step_type_tokenize:
965 buf16 = icu_buf_utf16_create(0);
971 /* create actual chain step with this buffer */
972 step = icu_chain_step_create(chain, type, rule, buf16, status);
974 step->previous = chain->steps;
981 int icu_chain_step_next_token(struct icu_chain * chain,
982 struct icu_chain_step * step,
985 struct icu_buf_utf16 * src16 = 0;
986 int got_new_token = 0;
988 if (!chain || !chain->src16 || !step || !step->more_tokens)
991 /* assign utf16 src buffers as neeed, advance in previous steps
992 tokens until non-zero token met, and setting stop condition */
996 src16 = step->previous->buf16;
997 /* tokens might be killed in previous steps, therefore looping */
999 while (step->need_new_token
1000 && step->previous->more_tokens
1003 = icu_chain_step_next_token(chain, step->previous, status);
1006 { /* first step can only work once on chain->src16 input buffer */
1007 src16 = chain->src16;
1008 step->more_tokens = 0;
1015 /* stop if nothing to process */
1016 if (step->need_new_token && !got_new_token)
1018 step->more_tokens = 0;
1022 /* either an old token not finished yet, or a new token, thus
1023 perform the work, eventually put this steps output in
1024 step->buf16 or the chains UTF8 output buffers */
1028 case ICU_chain_step_type_display:
1029 icu_utf16_to_utf8(chain->display8, src16, status);
1031 case ICU_chain_step_type_casemap:
1032 icu_casemap_casemap(step->u.casemap,
1033 step->buf16, src16, status,
1036 case ICU_chain_step_type_normalize:
1037 icu_normalizer_normalize(step->u.normalizer,
1038 step->buf16, src16, status);
1040 case ICU_chain_step_type_tokenize:
1041 /* attach to new src16 token only first time during splitting */
1042 if (step->need_new_token)
1044 icu_tokenizer_attach(step->u.tokenizer, src16, status);
1045 step->need_new_token = 0;
1048 /* splitting one src16 token into multiple buf16 tokens */
1050 = icu_tokenizer_next_token(step->u.tokenizer,
1051 step->buf16, status);
1053 /* make sure to get new previous token if this one had been used up
1054 by recursive call to _same_ step */
1056 if (!step->more_tokens)
1058 step->more_tokens = icu_chain_step_next_token(chain, step, status);
1059 return step->more_tokens; /* avoid one token count too much! */
1067 if (U_FAILURE(*status))
1070 /* if token disappered into thin air, tell caller */
1071 /* if (!step->buf16->utf16_len && !step->more_tokens) */
1078 int icu_chain_assign_cstr(struct icu_chain * chain,
1079 const char * src8cstr,
1082 struct icu_chain_step * stp = 0;
1084 if (!chain || !src8cstr)
1087 chain->src8cstr = src8cstr;
1091 /* clear token count */
1092 chain->token_count = 0;
1094 /* clear all steps stop states */
1097 stp->more_tokens = 1;
1098 stp->need_new_token = 1;
1099 stp = stp->previous;
1102 /* finally convert UTF8 to UTF16 string if needed */
1103 if (chain->steps || chain->sort)
1104 icu_utf16_from_utf8_cstr(chain->src16, chain->src8cstr, status);
1106 if (U_FAILURE(*status))
1114 int icu_chain_next_token(struct icu_chain * chain,
1119 *status = U_ZERO_ERROR;
1124 /* special case with no steps - same as index type binary */
1127 if (chain->token_count)
1131 chain->token_count++;
1134 icu_sortkey8_from_utf16(chain->coll,
1135 chain->sort8, chain->steps->buf16,
1137 return chain->token_count;
1140 /* usual case, one or more icu chain steps existing */
1143 while(!got_token && chain->steps && chain->steps->more_tokens)
1144 got_token = icu_chain_step_next_token(chain, chain->steps, status);
1148 chain->token_count++;
1150 icu_utf16_to_utf8(chain->norm8, chain->steps->buf16, status);
1153 icu_sortkey8_from_utf16(chain->coll,
1154 chain->sort8, chain->steps->buf16,
1157 return chain->token_count;
1164 int icu_chain_token_number(struct icu_chain * chain)
1169 return chain->token_count;
1173 const char * icu_chain_token_display(struct icu_chain * chain)
1175 if (chain->display8)
1176 return icu_buf_utf8_to_cstr(chain->display8);
1181 const char * icu_chain_token_norm(struct icu_chain * chain)
1184 return chain->src8cstr;
1187 return icu_buf_utf8_to_cstr(chain->norm8);
1192 const char * icu_chain_token_sortkey(struct icu_chain * chain)
1195 return icu_buf_utf8_to_cstr(chain->sort8);
1200 const UCollator * icu_chain_get_coll(struct icu_chain * chain)
1205 #endif /* YAZ_HAVE_ICU */
1210 * indent-tabs-mode: nil
1212 * vim: shiftwidth=4 tabstop=8 expandtab