test/tst_icu_I18N.c

   1 /* $Id: tst_icu_I18N.c,v 1.8 2007-10-25 08:42:21 marc Exp $
   2    Copyright (c) 2006-2007, Index Data.
   3
   4    This file is part of Pazpar2.
   5
   6    Pazpar2 is free software; you can redistribute it and/or modify it under
   7    the terms of the GNU General Public License as published by the Free
   8    Software Foundation; either version 2, or (at your option) any later
   9    version.
  10
  11    Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
  12    WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14    for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with Pazpar2; see the file LICENSE.  If not, write to the
  18    Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19    02111-1307, USA.
  20 */
  21
  22 /* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
  23
  24
  25 #if HAVE_CONFIG_H
  26 #include "config.h"
  27 #endif
  28
  29 #define USE_TIMING 0
  30 #if USE_TIMING
  31 #include <yaz/timing.h>
  32 #endif
  33
  34 #include <yaz/test.h>
  35
  36 #if HAVE_ICU
  37 #include <yaz/icu_I18N.h>
  38
  39 #include <string.h>
  40 #include <stdlib.h>
  41
  42 /* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
  43
  44
  45 #define MAX_KEY_SIZE 256
  46 struct icu_termmap
  47 {
  48     uint8_t sort_key[MAX_KEY_SIZE]; /* standard C string '\0' terminated */
  49     char disp_term[MAX_KEY_SIZE];  /* standard C utf-8 string */
  50 };
  51
  52
  53
  54 int icu_termmap_cmp(const void *vp1, const void *vp2)
  55 {
  56     struct icu_termmap *itmp1 = *(struct icu_termmap **) vp1;
  57     struct icu_termmap *itmp2 = *(struct icu_termmap **) vp2;
  58
  59     int cmp = 0;
  60
  61     cmp = strcmp((const char *)itmp1->sort_key,
  62                  (const char *)itmp2->sort_key);
  63     return cmp;
  64 };
  65
  66
  67
  68
  69 int test_icu_casemap(const char * locale, char action,
  70                      const char * src8cstr, const char * chk8cstr)
  71 {
  72     int success = 0;
  73     UErrorCode status = U_ZERO_ERROR;
  74
  75     struct icu_buf_utf8 * src8 = icu_buf_utf8_create(0);
  76     struct icu_buf_utf8 * dest8 = icu_buf_utf8_create(0);
  77     struct icu_buf_utf16 * src16 = icu_buf_utf16_create(0);
  78     struct icu_buf_utf16 * dest16 = icu_buf_utf16_create(0);
  79
  80
  81     int src8cstr_len = strlen(src8cstr);
  82     int chk8cstr_len = strlen(chk8cstr);
  83
  84     /* converting to UTF16 */
  85     icu_utf16_from_utf8_cstr(src16, src8cstr, &status);
  86
  87     /* perform case mapping */
  88     icu_utf16_casemap(dest16, src16, locale, action, &status);
  89
  90     /* converting to UTF8 */
  91     icu_utf16_to_utf8(dest8, dest16, &status);
  92
  93
  94
  95     /* determine success */
  96     if (dest8->utf8
  97         && (dest8->utf8_len == strlen(chk8cstr))
  98         && !strcmp(chk8cstr, (const char *) dest8->utf8))
  99         success = 1;
 100     else
 101         success = 0;
 102
 103     /* report failures */
 104     if (!success){
 105         printf("\nERROR\n");
 106         printf("original string:   '%s' (%d)\n", src8cstr, src8cstr_len);
 107         printf("icu_casemap '%s:%c' '%s' (%d)\n",
 108                locale, action, dest8->utf8, dest8->utf8_len);
 109         printf("expected string:   '%s' (%d)\n", chk8cstr, chk8cstr_len);
 110     }
 111
 112     /* clean the buffers */
 113     icu_buf_utf8_destroy(src8);
 114     icu_buf_utf8_destroy(dest8);
 115     icu_buf_utf16_destroy(src16);
 116     icu_buf_utf16_destroy(dest16);
 117
 118
 119     return success;
 120 }
 121
 122
 123
 124 /* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
 125
 126 void test_icu_I18N_casemap(int argc, char **argv)
 127 {
 128
 129     /* Locale 'en' */
 130
 131     /* successful tests */
 132     YAZ_CHECK(test_icu_casemap("en", 'l',
 133                                "A ReD fOx hunTS sQUirriLs",
 134                                "a red fox hunts squirrils"));
 135
 136     YAZ_CHECK(test_icu_casemap("en", 'u',
 137                                "A ReD fOx hunTS sQUirriLs",
 138                                "A RED FOX HUNTS SQUIRRILS"));
 139
 140     YAZ_CHECK(test_icu_casemap("en", 'f',
 141                                "A ReD fOx hunTS sQUirriLs",
 142                                "a red fox hunts squirrils"));
 143
 144     YAZ_CHECK(test_icu_casemap("en", 't',
 145                                "A ReD fOx hunTS sQUirriLs",
 146                                "A Red Fox Hunts Squirrils"));
 147
 148
 149     /* Locale 'da' */
 150
 151     /* success expected */
 152     YAZ_CHECK(test_icu_casemap("da", 'l',
 153                                "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN",
 154                                "åh æble, øs fløde i åen efter blåbærgrøden"));
 155
 156     YAZ_CHECK(test_icu_casemap("da", 'u',
 157                                "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN",
 158                                "ÅH ÆBLE, ØS FLØDE I ÅEN EFTER BLÅBÆRGRØDEN"));
 159
 160     YAZ_CHECK(test_icu_casemap("da", 'f',
 161                                "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN",
 162                                "åh æble, øs fløde i åen efter blåbærgrøden"));
 163
 164     YAZ_CHECK(test_icu_casemap("da", 't',
 165                                "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN",
 166                                "Åh Æble, Øs Fløde I Åen Efter Blåbærgrøden"));
 167
 168     /* Locale 'de' */
 169
 170     /* success expected */
 171     YAZ_CHECK(test_icu_casemap("de", 'l',
 172                                "zWÖlf ärgerliche Würste rollen ÜBer die StRAße",
 173                                "zwölf ärgerliche würste rollen über die straße"));
 174
 175     YAZ_CHECK(test_icu_casemap("de", 'u',
 176                                "zWÖlf ärgerliche Würste rollen ÜBer die StRAße",
 177                                "ZWÖLF ÄRGERLICHE WÜRSTE ROLLEN ÜBER DIE STRASSE"));
 178
 179     YAZ_CHECK(test_icu_casemap("de", 'f',
 180                                "zWÖlf ärgerliche Würste rollen ÜBer die StRAße",
 181                                "zwölf ärgerliche würste rollen über die strasse"));
 182
 183     YAZ_CHECK(test_icu_casemap("de", 't',
 184                                "zWÖlf ärgerliche Würste rollen ÜBer die StRAße",
 185                                "Zwölf Ärgerliche Würste Rollen Über Die Straße"));
 186
 187 }
 188
 189
 190 /* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
 191
 192 int test_icu_sortmap(const char * locale, int src_list_len,
 193                      const char ** src_list, const char ** chk_list)
 194 {
 195     int success = 1;
 196
 197     UErrorCode status = U_ZERO_ERROR;
 198
 199     struct icu_buf_utf8 * buf8 = icu_buf_utf8_create(0);
 200     struct icu_buf_utf16 * buf16 = icu_buf_utf16_create(0);
 201
 202     int i;
 203
 204     struct icu_termmap * list[src_list_len];
 205
 206     UCollator *coll = ucol_open(locale, &status);
 207     icu_check_status(status);
 208
 209     if(U_FAILURE(status))
 210         return 0;
 211
 212     /* assigning display terms and sort keys using buf 8 and buf16 */
 213     for( i = 0; i < src_list_len; i++)
 214         {
 215
 216             list[i] = (struct icu_termmap *) malloc(sizeof(struct icu_termmap));
 217
 218             /* copy display term */
 219             strcpy(list[i]->disp_term, src_list[i]);
 220
 221             /* transforming to UTF16 */
 222             icu_utf16_from_utf8_cstr(buf16, list[i]->disp_term, &status);
 223             icu_check_status(status);
 224
 225             /* computing sortkeys */
 226             icu_sortkey8_from_utf16(coll, buf8, buf16, &status);
 227             icu_check_status(status);
 228
 229             /* assigning sortkeys */
 230             memcpy(list[i]->sort_key, buf8->utf8, buf8->utf8_len);
 231         }
 232
 233
 234     /* do the sorting */
 235     qsort(list, src_list_len,
 236           sizeof(struct icu_termmap *), icu_termmap_cmp);
 237
 238     /* checking correct sorting */
 239     for (i = 0; i < src_list_len; i++){
 240         if (0 != strcmp(list[i]->disp_term, chk_list[i])){
 241             success = 0;
 242         }
 243     }
 244
 245     if(!success){
 246         printf("\nERROR\n");
 247         printf("Input str: '%s' : ", locale);
 248         for (i = 0; i < src_list_len; i++) {
 249             printf(" '%s'", list[i]->disp_term);
 250         }
 251         printf("\n");
 252         printf("ICU sort:  '%s' : ", locale);
 253         for (i = 0; i < src_list_len; i++) {
 254             printf(" '%s'", list[i]->disp_term);
 255         }
 256         printf("\n");
 257         printf("Expected:  '%s' : ", locale);
 258         for (i = 0; i < src_list_len; i++) {
 259             printf(" '%s'", chk_list[i]);
 260         }
 261         printf("\n");
 262     }
 263
 264
 265
 266     for( i = 0; i < src_list_len; i++)
 267         free(list[i]);
 268
 269
 270     ucol_close(coll);
 271
 272     icu_buf_utf8_destroy(buf8);
 273     icu_buf_utf16_destroy(buf16);
 274
 275     return success;
 276 }
 277
 278
 279 /* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
 280
 281 void test_icu_I18N_sortmap(int argc, char **argv)
 282 {
 283
 284     /* successful tests */
 285     size_t en_1_len = 6;
 286     const char * en_1_src[6] = {"z", "K", "a", "A", "Z", "k"};
 287     const char * en_1_cck[6] = {"a", "A", "k", "K", "z", "Z"};
 288     YAZ_CHECK(test_icu_sortmap("en", en_1_len, en_1_src, en_1_cck));
 289     YAZ_CHECK(test_icu_sortmap("en_AU", en_1_len, en_1_src, en_1_cck));
 290     YAZ_CHECK(test_icu_sortmap("en_CA", en_1_len, en_1_src, en_1_cck));
 291     YAZ_CHECK(test_icu_sortmap("en_GB", en_1_len, en_1_src, en_1_cck));
 292     YAZ_CHECK(test_icu_sortmap("en_US", en_1_len, en_1_src, en_1_cck));
 293
 294     /* successful tests */
 295     {
 296         size_t da_1_len = 6;
 297         const char * da_1_src[6] = {"z", "å", "o", "æ", "a", "ø"};
 298         const char * da_1_cck[6] = {"a", "o", "z", "æ", "ø", "å"};
 299         YAZ_CHECK(test_icu_sortmap("da", da_1_len, da_1_src, da_1_cck));
 300         YAZ_CHECK(test_icu_sortmap("da_DK", da_1_len, da_1_src, da_1_cck));
 301     }
 302     /* successful tests */
 303     {
 304         size_t de_1_len = 9;
 305         const char * de_1_src[9] = {"u", "ä", "o", "t", "s", "ß", "ü", "ö", "a"};
 306         const char * de_1_cck[9] = {"a","ä", "o", "ö", "s", "ß", "t", "u", "ü"};
 307         YAZ_CHECK(test_icu_sortmap("de", de_1_len, de_1_src, de_1_cck));
 308         YAZ_CHECK(test_icu_sortmap("de_AT", de_1_len, de_1_src, de_1_cck));
 309         YAZ_CHECK(test_icu_sortmap("de_DE", de_1_len, de_1_src, de_1_cck));
 310     }
 311
 312 }
 313
 314
 315 /* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
 316
 317
 318
 319
 320 int test_icu_normalizer(const char * rules8cstr,
 321                             const char * src8cstr,
 322                             const char * chk8cstr)
 323 {
 324     int success = 0;
 325
 326     UErrorCode status = U_ZERO_ERROR;
 327
 328     struct icu_buf_utf16 * src16 = icu_buf_utf16_create(0);
 329     struct icu_buf_utf16 * dest16 = icu_buf_utf16_create(0);
 330     struct icu_buf_utf8 * dest8 = icu_buf_utf8_create(0);
 331     struct icu_normalizer * normalizer
 332         = icu_normalizer_create(rules8cstr, 'f', &status);
 333     icu_check_status(status);
 334
 335     icu_utf16_from_utf8_cstr(src16, src8cstr, &status);
 336     icu_check_status(status);
 337
 338     icu_normalizer_normalize(normalizer, dest16, src16, &status);
 339     icu_check_status(status);
 340
 341     icu_utf16_to_utf8(dest8, dest16, &status);
 342     icu_check_status(status);
 343
 344
 345     if(!strcmp((const char *) dest8->utf8,
 346                (const char *) chk8cstr))
 347         success = 1;
 348     else {
 349         success = 0;
 350         printf("Normalization\n");
 351         printf("Rules:      '%s'\n", rules8cstr);
 352         printf("Input:      '%s'\n", src8cstr);
 353         printf("Normalized: '%s'\n", dest8->utf8);
 354         printf("Expected:   '%s'\n", chk8cstr);
 355     }
 356
 357
 358     icu_normalizer_destroy(normalizer);
 359     icu_buf_utf16_destroy(src16);
 360     icu_buf_utf16_destroy(dest16);
 361     icu_buf_utf8_destroy(dest8);
 362
 363     return success;
 364 };
 365
 366
 367 /* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
 368
 369 void test_icu_I18N_normalizer(int argc, char **argv)
 370 {
 371
 372     YAZ_CHECK(test_icu_normalizer("[:Punctuation:] Any-Remove",
 373                                   "Don't shoot!",
 374                                   "Dont shoot"));
 375
 376     YAZ_CHECK(test_icu_normalizer("[:Control:] Any-Remove",
 377                                   "Don't\n shoot!",
 378                                   "Don't shoot!"));
 379
 380     YAZ_CHECK(test_icu_normalizer("[:Decimal_Number:] Any-Remove",
 381                                   "This is 4 you!",
 382                                   "This is  you!"));
 383
 384     YAZ_CHECK(test_icu_normalizer("Lower; [:^Letter:] Remove",
 385                                   "Don't shoot!",
 386                                   "dontshoot"));
 387
 388     YAZ_CHECK(test_icu_normalizer("[:^Number:] Remove",
 389                                   "Monday 15th of April",
 390                                   "15"));
 391
 392     YAZ_CHECK(test_icu_normalizer("Lower;"
 393                                   "[[:WhiteSpace:][:Punctuation:]] Remove",
 394                                   " word4you? ",
 395                                   "word4you"));
 396
 397
 398     YAZ_CHECK(test_icu_normalizer("NFD; [:Nonspacing Mark:] Remove; NFC",
 399                                   "à côté de l'alcôve ovoïde",
 400                                   "a cote de l'alcove ovoide"));
 401
 402 }
 403
 404
 405 /* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
 406
 407 int test_icu_tokenizer(const char * locale, char action,
 408                      const char * src8cstr, int count)
 409 {
 410     int success = 1;
 411
 412     UErrorCode status = U_ZERO_ERROR;
 413     struct icu_buf_utf16 * src16 = icu_buf_utf16_create(0);
 414     struct icu_buf_utf16 * tkn16 = icu_buf_utf16_create(0);
 415     struct icu_buf_utf8 * tkn8 = icu_buf_utf8_create(0);
 416     struct icu_tokenizer * tokenizer = 0;
 417
 418     /* transforming to UTF16 */
 419     icu_utf16_from_utf8_cstr(src16, src8cstr, &status);
 420     icu_check_status(status);
 421
 422     /* set up tokenizer */
 423     tokenizer = icu_tokenizer_create(locale, action, &status);
 424     icu_check_status(status);
 425     YAZ_CHECK(tokenizer);
 426
 427     /* attach text buffer to tokenizer */
 428     icu_tokenizer_attach(tokenizer, src16, &status);
 429     icu_check_status(status);
 430     YAZ_CHECK(tokenizer->bi);
 431
 432     /* perform work on tokens */
 433     while(icu_tokenizer_next_token(tokenizer, tkn16, &status)){
 434         icu_check_status(status);
 435
 436         /* converting to UTF8 */
 437         icu_utf16_to_utf8(tkn8, tkn16, &status);
 438     }
 439
 440     if (count != icu_tokenizer_token_count(tokenizer)){
 441         success = 0;
 442         printf("\nTokenizer '%s:%c' Error: \n", locale, action);
 443         printf("Input:  '%s'\n", src8cstr);
 444         printf("Tokens: %d", icu_tokenizer_token_count(tokenizer));
 445         printf(", expected: %d\n", count);
 446     }
 447
 448     icu_tokenizer_destroy(tokenizer);
 449     icu_buf_utf16_destroy(src16);
 450     icu_buf_utf16_destroy(tkn16);
 451     icu_buf_utf8_destroy(tkn8);
 452
 453     return success;
 454 }
 455
 456
 457 /* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
 458
 459 void test_icu_I18N_tokenizer(int argc, char **argv)
 460 {
 461
 462
 463     const char * en_str
 464         = "O Romeo, Romeo! wherefore art thou Romeo?";
 465
 466     YAZ_CHECK(test_icu_tokenizer("en", 's', en_str, 2));
 467     YAZ_CHECK(test_icu_tokenizer("en", 'l', en_str, 7));
 468     YAZ_CHECK(test_icu_tokenizer("en", 'w', en_str, 16));
 469     YAZ_CHECK(test_icu_tokenizer("en", 'c', en_str, 41));
 470
 471
 472
 473     {
 474         const char * da_str
 475             = "Blåbærtærte. Denne kage stammer fra Finland. "
 476             "Den er med blåbær, men alle sommerens forskellige bær kan bruges.";
 477
 478         YAZ_CHECK(test_icu_tokenizer("da", 's', da_str, 3));
 479         YAZ_CHECK(test_icu_tokenizer("dar", 'l', da_str, 17));
 480         YAZ_CHECK(test_icu_tokenizer("da", 'w', da_str, 37));
 481         YAZ_CHECK(test_icu_tokenizer("da", 'c', da_str, 110));
 482     }
 483
 484 }
 485
 486
 487 void test_icu_I18N_chain(int argc, char **argv)
 488 {
 489     const char * en_str
 490         = "O Romeo, Romeo! wherefore art thou\t Romeo?";
 491
 492     UErrorCode status = U_ZERO_ERROR;
 493     struct icu_chain * chain = 0;
 494
 495
 496     const char * xml_str = "<icu>"
 497         "<normalize rule=\"[:Control:] Any-Remove\"/>"
 498         "<tokenize rule=\"l\"/>"
 499         "<normalize rule=\"[[:WhiteSpace:][:Punctuation:]] Remove\"/>"
 500         "<display/>"
 501         "<casemap rule=\"l\"/>"
 502         "</icu>";
 503
 504
 505     xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
 506     xmlNode *xml_node = xmlDocGetRootElement(doc);
 507     YAZ_CHECK(xml_node);
 508
 509     // printf("ICU chain:\ninput: '%s'\n", en_str);
 510
 511
 512     chain = icu_chain_xml_config(xml_node, (uint8_t *) "en", 0, &status);
 513
 514     xmlFreeDoc(doc);
 515     YAZ_CHECK(chain);
 516
 517     YAZ_CHECK(icu_chain_assign_cstr(chain, en_str, &status));
 518
 519     while (icu_chain_next_token(chain, &status)){
 520         ;
 521         // printf("%d '%s' '%s'\n",
 522         //       icu_chain_get_token_count(chain),
 523         //       icu_chain_get_norm(chain),
 524         //       icu_chain_get_display(chain));
 525     }
 526
 527     YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 7);
 528
 529
 530     YAZ_CHECK(icu_chain_assign_cstr(chain, "what is this?", &status));
 531
 532     while (icu_chain_next_token(chain, &status)){
 533         ;
 534         //printf("%d '%s' '%s'\n",
 535         //       icu_chain_get_token_count(chain),
 536         //       icu_chain_get_norm(chain),
 537         //       icu_chain_get_display(chain));
 538     }
 539
 540
 541     YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 3);
 542
 543     icu_chain_destroy(chain);
 544 }
 545
 546
 547 void test_bug_1140(void)
 548 {
 549     UErrorCode status = U_ZERO_ERROR;
 550     struct icu_chain * chain = 0;
 551
 552     const char * xml_str = "<icu>"
 553
 554         /* if the first rule is normalize instead. Then it works */
 555 #if 0
 556         "<normalize rule=\"[:Control:] Any-Remove\"/>"
 557 #endif
 558         "<tokenize rule=\"l\"/>"
 559         "<normalize rule=\"[[:WhiteSpace:][:Punctuation:]] Remove\"/>"
 560         "<display/>"
 561         "<casemap rule=\"l\"/>"
 562         "</icu>";
 563
 564
 565     xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
 566     xmlNode *xml_node = xmlDocGetRootElement(doc);
 567     YAZ_CHECK(xml_node);
 568
 569     chain = icu_chain_xml_config(xml_node, (uint8_t *) "en", 0, &status);
 570
 571     xmlFreeDoc(doc);
 572     YAZ_CHECK(chain);
 573
 574     YAZ_CHECK(icu_chain_assign_cstr(
 575                   chain,  "O Romeo, Romeo! wherefore art thou\t Romeo?",
 576                   &status));
 577
 578     while (icu_chain_next_token(chain, &status)){
 579         ;
 580         //printf("%d '%s' '%s'\n",
 581         //       icu_chain_get_token_count(chain),
 582         //       icu_chain_get_norm(chain),
 583         //       icu_chain_get_display(chain));
 584
 585     }
 586
 587
 588     YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 7);
 589
 590     YAZ_CHECK(icu_chain_assign_cstr(chain, "what is this?", &status));
 591
 592     while (icu_chain_next_token(chain, &status)){
 593        ;
 594        //printf("%d '%s' '%s'\n",
 595        //        icu_chain_get_token_count(chain),
 596        //        icu_chain_get_norm(chain),
 597        //        icu_chain_get_display(chain));
 598     }
 599
 600     /* we expect 'what' 'is' 'this', i.e. 3 tokens */
 601     YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 3);
 602
 603     icu_chain_destroy(chain);
 604 }
 605
 606
 607
 608 void test_chain_empty_token(void)
 609 {
 610     UErrorCode status = U_ZERO_ERROR;
 611     struct icu_chain * chain = 0;
 612
 613     const char * xml_str = "<icu>"
 614         "<tokenize rule=\"w\"/>"
 615         "<normalize rule=\"[[:WhiteSpace:][:Punctuation:]] Remove\"/>"
 616         "</icu>";
 617
 618     xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
 619     xmlNode *xml_node = xmlDocGetRootElement(doc);
 620     YAZ_CHECK(xml_node);
 621
 622     chain = icu_chain_xml_config(xml_node, (uint8_t *) "en", 0, &status);
 623
 624     xmlFreeDoc(doc);
 625     YAZ_CHECK(chain);
 626
 627     YAZ_CHECK(icu_chain_assign_cstr(
 628                   chain,  "a string with 15 wordbreaks and 8 tokens",
 629                   &status));
 630
 631     while (icu_chain_next_token(chain, &status)){
 632         ;
 633         //printf("%d '%s' '%s'\n",
 634         //       icu_chain_get_token_count(chain),
 635         //       icu_chain_get_norm(chain),
 636         //       icu_chain_get_display(chain));
 637     }
 638
 639     YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 8);
 640
 641     icu_chain_destroy(chain);
 642 }
 643
 644 #endif // HAVE_ICU
 645
 646 /* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
 647
 648 int main(int argc, char **argv)
 649 {
 650
 651     YAZ_CHECK_INIT(argc, argv);
 652     YAZ_CHECK_LOG();
 653
 654 #if HAVE_ICU
 655
 656     test_icu_I18N_casemap(argc, argv);
 657     test_icu_I18N_sortmap(argc, argv);
 658     test_icu_I18N_normalizer(argc, argv);
 659     test_icu_I18N_tokenizer(argc, argv);
 660     test_icu_I18N_chain(argc, argv);
 661     test_chain_empty_token();
 662     test_bug_1140();
 663
 664 #else /* HAVE_ICU */
 665
 666     printf("ICU unit tests omitted.\n"
 667            "Please install libicu36-dev and icu-doc or similar\n");
 668     YAZ_CHECK(0 == 0);
 669
 670 #endif /* HAVE_ICU */
 671
 672     YAZ_CHECK_TERM;
 673 }
 674
 675
 676 /* DO NOT EDIT THIS FILE IF YOUR EDITOR DOES NOT SUPPORT UTF-8 */
 677
 678
 679
 680 /*
 681  * Local variables:
 682  * c-basic-offset: 4
 683  * indent-tabs-mode: nil
 684  * End:
 685  * vim: shiftwidth=4 tabstop=8 expandtab
 686  */