2 * Copyright (C) 1995-2007, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: icu_I18N.c,v 1.2 2007-10-22 17:32:07 adam Exp $
14 #include <yaz/timing.h>
19 #include <yaz/icu_I18N.h>
27 #include <unicode/ustring.h> /* some more string fcns*/
28 #include <unicode/uchar.h> /* char names */
31 #include <unicode/ucol.h>
34 int icu_check_status (UErrorCode status)
36 if(U_FAILURE(status)){
38 "ICU: %d %s\n", status, u_errorName(status));
47 struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity)
49 struct icu_buf_utf16 * buf16
50 = (struct icu_buf_utf16 *) malloc(sizeof(struct icu_buf_utf16));
57 buf16->utf16 = (UChar *) malloc(sizeof(UChar) * capacity);
58 buf16->utf16[0] = (UChar) 0;
59 buf16->utf16_cap = capacity;
64 struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
69 if (0 == buf16->utf16)
70 buf16->utf16 = (UChar *) malloc(sizeof(UChar) * capacity);
73 = (UChar *) realloc(buf16->utf16, sizeof(UChar) * capacity);
74 buf16->utf16[0] = (UChar) 0;
76 buf16->utf16_cap = capacity;
91 struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
92 struct icu_buf_utf16 * src16)
98 if (dest16->utf16_cap < src16->utf16_len)
99 icu_buf_utf16_resize(dest16, src16->utf16_len * 2);
101 u_strncpy(dest16->utf16, src16->utf16, src16->utf16_len);
102 dest16->utf16_len = src16->utf16_len;
108 void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16)
122 struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity)
124 struct icu_buf_utf8 * buf8
125 = (struct icu_buf_utf8 *) malloc(sizeof(struct icu_buf_utf8));
132 buf8->utf8 = (uint8_t *) malloc(sizeof(uint8_t) * capacity);
133 buf8->utf8[0] = (uint8_t) 0;
134 buf8->utf8_cap = capacity;
141 struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
147 buf8->utf8 = (uint8_t *) malloc(sizeof(uint8_t) * capacity);
150 = (uint8_t *) realloc(buf8->utf8,
151 sizeof(uint8_t) * capacity);
152 buf8->utf8_cap = capacity;
167 struct icu_buf_utf8 * icu_buf_utf8_copy(struct icu_buf_utf8 * dest8,
168 struct icu_buf_utf8 * src8)
175 if (dest8->utf8_cap < src8->utf8_len)
176 icu_buf_utf8_resize(dest8, src8->utf8_len * 2);
178 strncpy((char*) dest8->utf8, (char*) src8->utf8, src8->utf8_len);
184 const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8)
186 if (!src8 || src8->utf8_len == 0)
188 if (src8->utf8_len == src8->utf8_cap)
189 src8 = icu_buf_utf8_resize(src8, src8->utf8_len * 2 + 1);
190 src8->utf8[src8->utf8_len] = '\0';
191 return (const char *) src8->utf8;
195 void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8)
206 UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16,
207 struct icu_buf_utf8 * src8,
210 int32_t utf16_len = 0;
212 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
214 (const char *) src8->utf8, src8->utf8_len, status);
216 /* check for buffer overflow, resize and retry */
217 if (*status == U_BUFFER_OVERFLOW_ERROR)
219 icu_buf_utf16_resize(dest16, utf16_len * 2);
220 *status = U_ZERO_ERROR;
221 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
223 (const char *) src8->utf8, src8->utf8_len, status);
226 if (U_SUCCESS(*status)
227 && utf16_len <= dest16->utf16_cap)
228 dest16->utf16_len = utf16_len;
230 dest16->utf16[0] = (UChar) 0;
231 dest16->utf16_len = 0;
239 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
240 const char * src8cstr,
243 size_t src8cstr_len = 0;
244 int32_t utf16_len = 0;
246 src8cstr_len = strlen(src8cstr);
248 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
250 src8cstr, src8cstr_len, status);
252 /* check for buffer overflow, resize and retry */
253 if (*status == U_BUFFER_OVERFLOW_ERROR)
255 icu_buf_utf16_resize(dest16, utf16_len * 2);
256 *status = U_ZERO_ERROR;
257 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
259 src8cstr, src8cstr_len, status);
262 if (U_SUCCESS(*status)
263 && utf16_len <= dest16->utf16_cap)
264 dest16->utf16_len = utf16_len;
266 dest16->utf16[0] = (UChar) 0;
267 dest16->utf16_len = 0;
276 UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
277 struct icu_buf_utf16 * src16,
280 int32_t utf8_len = 0;
282 u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
284 src16->utf16, src16->utf16_len, status);
286 /* check for buffer overflow, resize and retry */
287 if (*status == U_BUFFER_OVERFLOW_ERROR)
289 icu_buf_utf8_resize(dest8, utf8_len * 2);
290 *status = U_ZERO_ERROR;
291 u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
293 src16->utf16, src16->utf16_len, status);
297 if (U_SUCCESS(*status)
298 && utf8_len <= dest8->utf8_cap)
299 dest8->utf8_len = utf8_len;
301 dest8->utf8[0] = (uint8_t) 0;
310 struct icu_casemap * icu_casemap_create(const char *locale, char action,
313 struct icu_casemap * casemap
314 = (struct icu_casemap *) malloc(sizeof(struct icu_casemap));
315 strcpy(casemap->locale, locale);
316 casemap->action = action;
318 switch(casemap->action) {
328 icu_casemap_destroy(casemap);
335 void icu_casemap_destroy(struct icu_casemap * casemap)
342 int icu_casemap_casemap(struct icu_casemap * casemap,
343 struct icu_buf_utf16 * dest16,
344 struct icu_buf_utf16 * src16,
350 return icu_utf16_casemap(dest16, src16,
351 casemap->locale, casemap->action, status);
355 int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
356 struct icu_buf_utf16 * src16,
357 const char *locale, char action,
360 int32_t dest16_len = 0;
364 dest16_len = u_strToLower(dest16->utf16, dest16->utf16_cap,
365 src16->utf16, src16->utf16_len,
369 dest16_len = u_strToUpper(dest16->utf16, dest16->utf16_cap,
370 src16->utf16, src16->utf16_len,
374 dest16_len = u_strToTitle(dest16->utf16, dest16->utf16_cap,
375 src16->utf16, src16->utf16_len,
379 dest16_len = u_strFoldCase(dest16->utf16, dest16->utf16_cap,
380 src16->utf16, src16->utf16_len,
381 U_FOLD_CASE_DEFAULT, status);
385 return U_UNSUPPORTED_ERROR;
389 /* check for buffer overflow, resize and retry */
390 if (*status == U_BUFFER_OVERFLOW_ERROR
391 && dest16 != src16 /* do not resize if in-place conversion */
393 icu_buf_utf16_resize(dest16, dest16_len * 2);
394 *status = U_ZERO_ERROR;
399 dest16_len = u_strToLower(dest16->utf16, dest16->utf16_cap,
400 src16->utf16, src16->utf16_len,
404 dest16_len = u_strToUpper(dest16->utf16, dest16->utf16_cap,
405 src16->utf16, src16->utf16_len,
409 dest16_len = u_strToTitle(dest16->utf16, dest16->utf16_cap,
410 src16->utf16, src16->utf16_len,
414 dest16_len = u_strFoldCase(dest16->utf16, dest16->utf16_cap,
415 src16->utf16, src16->utf16_len,
416 U_FOLD_CASE_DEFAULT, status);
420 return U_UNSUPPORTED_ERROR;
425 if (U_SUCCESS(*status)
426 && dest16_len <= dest16->utf16_cap)
427 dest16->utf16_len = dest16_len;
429 dest16->utf16[0] = (UChar) 0;
430 dest16->utf16_len = 0;
438 UErrorCode icu_sortkey8_from_utf16(UCollator *coll,
439 struct icu_buf_utf8 * dest8,
440 struct icu_buf_utf16 * src16,
444 int32_t sortkey_len = 0;
446 sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
447 dest8->utf8, dest8->utf8_cap);
449 /* check for buffer overflow, resize and retry */
450 if (sortkey_len > dest8->utf8_cap) {
451 icu_buf_utf8_resize(dest8, sortkey_len * 2);
452 sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
453 dest8->utf8, dest8->utf8_cap);
456 if (U_SUCCESS(*status)
458 dest8->utf8_len = sortkey_len;
460 dest8->utf8[0] = (UChar) 0;
469 struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
472 struct icu_tokenizer * tokenizer
473 = (struct icu_tokenizer *) malloc(sizeof(struct icu_tokenizer));
475 strcpy(tokenizer->locale, locale);
476 tokenizer->action = action;
478 tokenizer->buf16 = 0;
479 tokenizer->token_count = 0;
480 tokenizer->token_id = 0;
481 tokenizer->token_start = 0;
482 tokenizer->token_end = 0;
485 switch(tokenizer->action) {
488 = ubrk_open(UBRK_LINE, tokenizer->locale,
493 = ubrk_open(UBRK_SENTENCE, tokenizer->locale,
498 = ubrk_open(UBRK_WORD, tokenizer->locale,
503 = ubrk_open(UBRK_CHARACTER, tokenizer->locale,
508 = ubrk_open(UBRK_TITLE, tokenizer->locale,
512 *status = U_UNSUPPORTED_ERROR;
517 /* ICU error stuff is a very funny business */
518 if (U_SUCCESS(*status))
521 /* freeing if failed */
522 icu_tokenizer_destroy(tokenizer);
526 void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer)
530 ubrk_close(tokenizer->bi);
535 int icu_tokenizer_attach(struct icu_tokenizer * tokenizer,
536 struct icu_buf_utf16 * src16,
539 if (!tokenizer || !tokenizer->bi || !src16)
543 tokenizer->buf16 = src16;
544 tokenizer->token_count = 0;
545 tokenizer->token_id = 0;
546 tokenizer->token_start = 0;
547 tokenizer->token_end = 0;
549 ubrk_setText(tokenizer->bi, src16->utf16, src16->utf16_len, status);
552 if (U_FAILURE(*status))
558 int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer,
559 struct icu_buf_utf16 * tkn16,
562 int32_t tkn_start = 0;
567 if (!tokenizer || !tokenizer->bi
568 || !tokenizer->buf16 || !tokenizer->buf16->utf16_len)
572 never change tokenizer->buf16 and keep always invariant
573 0 <= tokenizer->token_start
574 <= tokenizer->token_end
575 <= tokenizer->buf16->utf16_len
576 returns length of token
579 if (0 == tokenizer->token_end) /* first call */
580 tkn_start = ubrk_first(tokenizer->bi);
581 else /* successive calls */
582 tkn_start = tokenizer->token_end;
584 /* get next position */
585 tkn_end = ubrk_next(tokenizer->bi);
587 /* repairing invariant at end of ubrk, which is UBRK_DONE = -1 */
588 if (UBRK_DONE == tkn_end)
589 tkn_end = tokenizer->buf16->utf16_len;
591 /* copy out if everything is well */
592 if(U_FAILURE(*status))
595 /* everything OK, now update internal state */
596 tkn_len = tkn_end - tkn_start;
599 tokenizer->token_count++;
600 tokenizer->token_id++;
602 tokenizer->token_id = 0;
604 tokenizer->token_start = tkn_start;
605 tokenizer->token_end = tkn_end;
608 /* copying into token buffer if it exists */
610 if (tkn16->utf16_cap < tkn_len)
611 icu_buf_utf16_resize(tkn16, (size_t) tkn_len * 2);
613 u_strncpy(tkn16->utf16, &(tokenizer->buf16->utf16)[tkn_start],
616 tkn16->utf16_len = tkn_len;
623 int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer)
625 return tokenizer->token_id;
628 int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer)
630 return tokenizer->token_start;
633 int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer)
635 return tokenizer->token_end;
638 int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer)
640 return (tokenizer->token_end - tokenizer->token_start);
643 int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer)
645 return tokenizer->token_count;
650 struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
654 struct icu_normalizer * normalizer
655 = (struct icu_normalizer *) malloc(sizeof(struct icu_normalizer));
657 normalizer->action = action;
658 normalizer->trans = 0;
659 normalizer->rules16 = icu_buf_utf16_create(0);
660 icu_utf16_from_utf8_cstr(normalizer->rules16, rules, status);
662 switch(normalizer->action) {
665 = utrans_openU(normalizer->rules16->utf16,
666 normalizer->rules16->utf16_len,
669 normalizer->parse_error, status);
673 = utrans_openU(normalizer->rules16->utf16,
674 normalizer->rules16->utf16_len,
677 normalizer->parse_error, status);
680 *status = U_UNSUPPORTED_ERROR;
685 if (U_SUCCESS(*status))
688 /* freeing if failed */
689 icu_normalizer_destroy(normalizer);
694 void icu_normalizer_destroy(struct icu_normalizer * normalizer){
696 if (normalizer->rules16)
697 icu_buf_utf16_destroy(normalizer->rules16);
698 if (normalizer->trans)
700 utrans_close(normalizer->trans);
708 int icu_normalizer_normalize(struct icu_normalizer * normalizer,
709 struct icu_buf_utf16 * dest16,
710 struct icu_buf_utf16 * src16,
713 if (!normalizer || !normalizer->trans || !src16 || !dest16)
716 if (!icu_buf_utf16_copy(dest16, src16))
719 utrans_transUChars (normalizer->trans,
720 dest16->utf16, &(dest16->utf16_len),
722 0, &(src16->utf16_len), status);
724 if (U_FAILURE(*status)){
725 dest16->utf16[0] = (UChar) 0;
726 dest16->utf16_len = 0;
729 return dest16->utf16_len;
735 struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain,
736 enum icu_chain_step_type type,
737 const uint8_t * rule,
738 struct icu_buf_utf16 * buf16,
741 struct icu_chain_step * step = 0;
743 if(!chain || !type || !rule)
746 step = (struct icu_chain_step *) malloc(sizeof(struct icu_chain_step));
752 /* create auxilary objects */
754 case ICU_chain_step_type_display:
756 case ICU_chain_step_type_index:
758 case ICU_chain_step_type_sortkey:
760 case ICU_chain_step_type_casemap:
761 step->u.casemap = icu_casemap_create((char *) chain->locale,
762 (char) rule[0], status);
764 case ICU_chain_step_type_normalize:
765 step->u.normalizer = icu_normalizer_create((char *) rule, 'f', status);
767 case ICU_chain_step_type_tokenize:
768 step->u.tokenizer = icu_tokenizer_create((char *) chain->locale,
769 (char) rule[0], status);
779 void icu_chain_step_destroy(struct icu_chain_step * step){
784 icu_chain_step_destroy(step->previous);
787 case ICU_chain_step_type_display:
789 case ICU_chain_step_type_index:
791 case ICU_chain_step_type_sortkey:
793 case ICU_chain_step_type_casemap:
794 icu_casemap_destroy(step->u.casemap);
795 icu_buf_utf16_destroy(step->buf16);
797 case ICU_chain_step_type_normalize:
798 icu_normalizer_destroy(step->u.normalizer);
799 icu_buf_utf16_destroy(step->buf16);
801 case ICU_chain_step_type_tokenize:
802 icu_tokenizer_destroy(step->u.tokenizer);
803 icu_buf_utf16_destroy(step->buf16);
813 struct icu_chain * icu_chain_create(const uint8_t * identifier,
814 const uint8_t * locale)
817 struct icu_chain * chain
818 = (struct icu_chain *) malloc(sizeof(struct icu_chain));
820 strncpy((char *) chain->identifier, (const char *) identifier, 128);
821 chain->identifier[128 - 1] = '\0';
822 strncpy((char *) chain->locale, (const char *) locale, 16);
823 chain->locale[16 - 1] = '\0';
825 chain->token_count = 0;
827 chain->display8 = icu_buf_utf8_create(0);
828 chain->norm8 = icu_buf_utf8_create(0);
829 chain->sort8 = icu_buf_utf8_create(0);
831 chain->src16 = icu_buf_utf16_create(0);
839 void icu_chain_destroy(struct icu_chain * chain)
842 icu_buf_utf8_destroy(chain->display8);
843 icu_buf_utf8_destroy(chain->norm8);
844 icu_buf_utf8_destroy(chain->sort8);
846 icu_buf_utf16_destroy(chain->src16);
848 icu_chain_step_destroy(chain->steps);
855 struct icu_chain * icu_chain_xml_config(xmlNode *xml_node,
856 UErrorCode * status){
859 struct icu_chain * chain = 0;
862 ||xml_node->type != XML_ELEMENT_NODE
863 || strcmp((const char *) xml_node->name, "icu_chain"))
868 xmlChar *xml_id = xmlGetProp(xml_node, (xmlChar *) "id");
869 xmlChar *xml_locale = xmlGetProp(xml_node, (xmlChar *) "locale");
871 if (!xml_id || !strlen((const char *) xml_id)
872 || !xml_locale || !strlen((const char *) xml_locale))
875 chain = icu_chain_create((const uint8_t *) xml_id,
876 (const uint8_t *) xml_locale);
884 for (node = xml_node->children; node; node = node->next)
887 struct icu_chain_step * step = 0;
889 if (node->type != XML_ELEMENT_NODE)
892 xml_rule = xmlGetProp(node, (xmlChar *) "rule");
894 if (!strcmp((const char *) node->name,
895 (const char *) "casemap")){
896 step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap,
897 (const uint8_t *) xml_rule, status);
899 else if (!strcmp((const char *) node->name,
900 (const char *) "normalize")){
901 step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize,
902 (const uint8_t *) xml_rule, status);
904 else if (!strcmp((const char *) node->name,
905 (const char *) "tokenize")){
906 step = icu_chain_insert_step(chain, ICU_chain_step_type_tokenize,
907 (const uint8_t *) xml_rule, status);
909 else if (!strcmp((const char *) node->name,
910 (const char *) "display")){
911 step = icu_chain_insert_step(chain, ICU_chain_step_type_display,
912 (const uint8_t *) "", status);
914 else if (!strcmp((const char *) node->name,
915 (const char *) "index")){
916 step = icu_chain_insert_step(chain, ICU_chain_step_type_index,
917 (const uint8_t *) "", status);
919 else if (!strcmp((const char *) node->name,
920 (const char *) "sortkey")){
921 step = icu_chain_insert_step(chain, ICU_chain_step_type_sortkey,
922 (const uint8_t *) "", status);
926 if (!step || U_FAILURE(*status)){
927 icu_chain_destroy(chain);
939 struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain,
940 enum icu_chain_step_type type,
941 const uint8_t * rule,
944 struct icu_chain_step * step = 0;
945 struct icu_buf_utf16 * src16 = 0;
946 struct icu_buf_utf16 * buf16 = 0;
948 if (!chain || !type || !rule)
951 /* assign utf16 src buffers as needed */
952 if (chain->steps && chain->steps->buf16)
953 src16 = chain->steps->buf16;
954 else if (chain->src16)
955 src16 = chain->src16;
960 /* create utf16 destination buffers as needed, or */
962 case ICU_chain_step_type_display:
965 case ICU_chain_step_type_index:
968 case ICU_chain_step_type_sortkey:
971 case ICU_chain_step_type_casemap:
972 buf16 = icu_buf_utf16_create(0);
974 case ICU_chain_step_type_normalize:
975 buf16 = icu_buf_utf16_create(0);
977 case ICU_chain_step_type_tokenize:
978 buf16 = icu_buf_utf16_create(0);
984 /* create actual chain step with this buffer */
985 step = icu_chain_step_create(chain, type, rule, buf16, status);
987 step->previous = chain->steps;
994 int icu_chain_step_next_token(struct icu_chain * chain,
995 struct icu_chain_step * step,
998 struct icu_buf_utf16 * src16 = 0;
1000 if (!chain || !chain->src16 || !step || !step->more_tokens)
1003 /* assign utf16 src buffers as neeed, advance in previous steps
1004 tokens until non-zero token met, and setting stop condition
1006 if (step->previous){
1007 src16 = step->previous->buf16;
1008 if (step->need_new_token)
1010 = icu_chain_step_next_token(chain, step->previous, status);
1012 else { /* first step can only work once on chain->src16 input buffer */
1013 src16 = chain->src16;
1014 step->more_tokens = 1;
1017 /* stop if nothing to process
1018 i.e new token source was not properly assigned
1020 if (!step->more_tokens || !src16)
1023 /* perform the work, eventually put this steps output in
1024 step->buf16 or the chains UTF8 output buffers */
1025 switch(step->type) {
1026 case ICU_chain_step_type_display:
1027 icu_utf16_to_utf8(chain->display8, src16, status);
1029 case ICU_chain_step_type_index:
1030 icu_utf16_to_utf8(chain->norm8, src16, status);
1032 case ICU_chain_step_type_sortkey:
1033 icu_utf16_to_utf8(chain->sort8, src16, status);
1035 case ICU_chain_step_type_casemap:
1036 icu_casemap_casemap(step->u.casemap,
1037 step->buf16, src16, status);
1039 case ICU_chain_step_type_normalize:
1040 icu_normalizer_normalize(step->u.normalizer,
1041 step->buf16, src16, status);
1043 case ICU_chain_step_type_tokenize:
1044 /* attach to new src16 token only first time during splitting */
1045 if (step->need_new_token){
1046 icu_tokenizer_attach(step->u.tokenizer, src16, status);
1047 step->need_new_token = 0;
1049 /* splitting one src16 token into multiple buf16 tokens */
1051 = icu_tokenizer_next_token(step->u.tokenizer,
1052 step->buf16, status);
1053 /* make sure to get new previous token if this one had been used up */
1054 if (step->previous && !step->more_tokens){
1055 if (icu_chain_step_next_token(chain, step->previous, status)){
1056 icu_tokenizer_attach(step->u.tokenizer, src16, status);
1057 step->need_new_token = 0;
1059 = icu_tokenizer_next_token(step->u.tokenizer,
1060 step->buf16, status);
1063 if (0 == step->more_tokens)
1073 /* stop further token processing if last step and
1074 new tokens are needed from previous (non-existing) step
1076 if (!step->previous && step->need_new_token)
1077 step->more_tokens = 0;
1079 if (U_FAILURE(*status))
1087 int icu_chain_assign_cstr(struct icu_chain * chain,
1088 const char * src8cstr,
1091 struct icu_chain_step * stp = 0;
1093 if (!chain || !src8cstr)
1098 /* clear token count */
1099 chain->token_count = 0;
1101 /* clear all steps stop states */
1103 stp->more_tokens = 1;
1104 stp->need_new_token = 1;
1105 stp = stp->previous;
1108 /* finally convert UTF8 to UTF16 string */
1109 icu_utf16_from_utf8_cstr(chain->src16, src8cstr, status);
1111 if (U_FAILURE(*status))
1119 int icu_chain_next_token(struct icu_chain * chain,
1124 if (!chain || !chain->steps)
1127 success = icu_chain_step_next_token(chain, chain->steps, status);
1130 chain->token_count++;
1131 return chain->token_count;
1137 int icu_chain_get_token_count(struct icu_chain * chain)
1142 return chain->token_count;
1147 const char * icu_chain_get_display(struct icu_chain * chain)
1149 if (chain->display8)
1150 return icu_buf_utf8_to_cstr(chain->display8);
1155 const char * icu_chain_get_norm(struct icu_chain * chain)
1158 return icu_buf_utf8_to_cstr(chain->norm8);
1163 const char * icu_chain_get_sort(struct icu_chain * chain)
1166 return icu_buf_utf8_to_cstr(chain->sort8);
1172 #endif /* HAVE_ICU */
1180 * indent-tabs-mode: nil
1182 * vim: shiftwidth=4 tabstop=8 expandtab