1 // Make command on debian 64 bit testing dist
3 gcc -g -Wall `icu-config --cppflags` `icu-config --ldflags` -o icu_bug_2 icu_bug_2.c
4 snatched from http://www.icu-project.org/userguide/Collate_API.html
6 added a struct icu_termmap such that I actually can see the output
13 #include <unicode/ustring.h> /* some more string fcns*/
14 #include <unicode/uchar.h> /* char names */
17 //#include <unicode/ustdio.h>
18 //#include <unicode/utypes.h> /* Basic ICU data types */
19 #include <unicode/ucol.h>
20 //#include <unicode/ucnv.h> /* C Converter API */
21 //#include <unicode/uloc.h>
22 //#include <unicode/ubrk.h>
23 //#include <unicode/unistr.h>
26 #define MAX_KEY_SIZE 256
36 struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity)
38 struct icu_buf_utf16 * buf16
39 = (struct icu_buf_utf16 *) malloc(sizeof(struct icu_buf_utf16));
46 buf16->utf16 = (UChar *) malloc(sizeof(UChar) * capacity);
47 buf16->utf16[0] = (UChar) 0;
48 buf16->utf16_cap = capacity;
54 struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
57 //printf("buf16 resize: %d\n", (int)capacity);
60 if (0 == buf16->utf16)
61 buf16->utf16 = (UChar *) malloc(sizeof(UChar) * capacity);
64 = (UChar *) realloc(buf16->utf16, sizeof(UChar) * capacity);
65 buf16->utf16[0] = (UChar) 0;
67 buf16->utf16_cap = capacity;
82 void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16)
102 struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity)
104 struct icu_buf_utf8 * buf8
105 = (struct icu_buf_utf8 *) malloc(sizeof(struct icu_buf_utf8));
112 buf8->utf8 = (uint8_t *) malloc(sizeof(uint8_t) * capacity);
113 buf8->utf8[0] = (uint8_t) 0;
114 buf8->utf8_cap = capacity;
121 struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
124 //printf("buf8 resize: %d\n", (int)capacity);
128 buf8->utf8 = (uint8_t *) malloc(sizeof(uint8_t) * capacity);
131 = (uint8_t *) realloc(buf8->utf8, sizeof(uint8_t) * capacity);
132 buf8->utf8[0] = (uint8_t) 0;
134 buf8->utf8_cap = capacity;
150 void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8)
161 UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16,
162 struct icu_buf_utf8 * src8,
165 //if(!U_SUCCESS(*status))
167 printf("icu_utf16_from_utf8 working - needs correcting, see icu_utf16_from_utf8_cstr\n");
169 u_strFromUTF8(dest16->utf16, dest16->utf16_cap, &(dest16->utf16_len),
170 (const char *) src8->utf8, src8->utf8_len, status);
172 // check for buffer overflow, resize and retry
173 if (dest16->utf16_len > dest16->utf16_cap){
174 printf("icu_utf16_from_utf8 need resize\n");
175 icu_buf_utf16_resize(dest16, dest16->utf16_len * 2);
176 *status = U_ZERO_ERROR;
177 u_strFromUTF8(dest16->utf16, dest16->utf16_cap, &(dest16->utf16_len),
178 (const char*) src8->utf8, src8->utf8_len, status);
186 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
187 const char * src8cstr,
190 size_t src8cstr_len = 0;
191 int32_t utf16_len = 0;
193 //if(!U_SUCCESS(status))
196 //printf("icu_utf16_from_utf8_cstr working\n");
197 src8cstr_len = strlen(src8cstr);
199 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
201 //&(dest16->utf16_len),
202 src8cstr, src8cstr_len, status);
204 // check for buffer overflow, resize and retry
205 if (*status == U_BUFFER_OVERFLOW_ERROR
206 //|| dest16->utf16_len > dest16->utf16_cap
208 //printf("icu_utf16_from_utf8_cstr need resize\n");
209 icu_buf_utf16_resize(dest16, utf16_len * 2);
210 *status = U_ZERO_ERROR;
211 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
213 //&(dest16->utf16_len),
214 src8cstr, src8cstr_len, status);
217 if (*status != U_BUFFER_OVERFLOW_ERROR
218 && utf16_len < dest16->utf16_cap)
219 dest16->utf16_len = utf16_len;
221 dest16->utf16[0] = (UChar) 0;
222 dest16->utf16_len = 0;
229 UErrorCode icu_sortkey8_from_utf16(UCollator *coll,
230 struct icu_buf_utf8 * dest8,
231 struct icu_buf_utf16 * src16,
235 int32_t sortkey_len = 0;
236 //if(!U_SUCCESS(status))
239 //printf("icu_sortkey8_from_utf16 working\n");
240 sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
241 dest8->utf8, dest8->utf8_cap);
243 // check for buffer overflow, resize and retry
244 if (sortkey_len > dest8->utf8_cap) {
245 //printf("icu_sortkey8_from_utf16 need resize\n");
246 icu_buf_utf8_resize(dest8, sortkey_len * 2);
247 sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
248 dest8->utf8, dest8->utf8_cap);
259 uint8_t sort_key[MAX_KEY_SIZE]; // standard C string '\0' terminated
260 char disp_term[MAX_KEY_SIZE]; // standard C utf-8 string
265 int icu_termmap_cmp(const void *vp1, const void *vp2)
267 struct icu_termmap *itmp1 = *(struct icu_termmap **) vp1;
268 struct icu_termmap *itmp2 = *(struct icu_termmap **) vp2;
272 cmp = strcmp((const char *)itmp1->sort_key,
273 (const char *)itmp2->sort_key);
278 int icu_check_status(UErrorCode status)
280 if(!U_SUCCESS(status))
281 printf("ICU status: %d %s\n", status, u_errorName(status));
287 int icu_coll_sort(const char * locale, int src_list_len,
288 const char ** src_list, const char ** chk_list)
290 UErrorCode status = U_ZERO_ERROR;
292 struct icu_buf_utf8 * buf8 = icu_buf_utf8_create(0);
293 struct icu_buf_utf16 * buf16 = icu_buf_utf16_create(0);
297 struct icu_termmap * list[src_list_len];
299 UCollator *coll = ucol_open(locale, &status);
300 icu_check_status(status);
302 if(!U_SUCCESS(status))
305 // assigning display terms and sort keys using buf 8 and buf16
306 for( i = 0; i < src_list_len; i++)
309 list[i] = (struct icu_termmap *) malloc(sizeof(struct icu_termmap));
312 strcpy(list[i]->disp_term, src_list[i]);
314 // transforming to UTF16
315 icu_utf16_from_utf8_cstr(buf16, list[i]->disp_term, &status);
316 icu_check_status(status);
318 // computing sortkeys
319 icu_sortkey8_from_utf16(coll, buf8, buf16, &status);
320 icu_check_status(status);
322 // assigning sortkeys
323 memcpy(list[i]->sort_key, buf8->utf8, buf8->utf8_len);
327 printf("Input str: '%s' : ", locale);
328 for (i = 0; i < src_list_len; i++) {
329 printf(" '%s'", list[i]->disp_term);
334 qsort(list, src_list_len,
335 sizeof(struct icu_termmap *), icu_termmap_cmp);
338 printf("ICU sort: '%s' : ", locale);
339 for (i = 0; i < src_list_len; i++) {
340 printf(" '%s'", list[i]->disp_term);
341 printf("(%d|%d)", list[i]->sort_key[0],list[i]->sort_key[1]);
347 icu_buf_utf8_destroy(buf8);
348 icu_buf_utf16_destroy(buf16);
354 int main(int argc, char **argv)
358 const char * en_1_src[6] = {"z", "K", "a", "A", "Z", "k"};
359 const char * en_1_cck[6] = {"a", "A", "K", "k", "z", "Z"};
360 icu_coll_sort("en", en_1_len, en_1_src, en_1_cck);
363 icu_coll_sort("en_AU", en_1_len, en_1_src, en_1_cck);
364 icu_coll_sort("en_CA", en_1_len, en_1_src, en_1_cck);
365 icu_coll_sort("en_GB", en_1_len, en_1_src, en_1_cck);
366 icu_coll_sort("en_US", en_1_len, en_1_src, en_1_cck);
370 const char * da_1_src[6] = {"z", "å", "o", "æ", "a", "ø"};
371 const char * da_1_cck[6] = {"a", "o", "z", "æ", "ø", "å"};
372 icu_coll_sort("da", da_1_len, da_1_src, da_1_cck);
373 icu_coll_sort("da_DK", da_1_len, da_1_src, da_1_cck);
377 const char * de_1_src[9] = {"u", "ä", "o", "t", "s", "ß", "ü", "ö", "a"};
378 const char * de_1_cck[9] = {"ä", "a", "o", "ö", "s", "ß", "t", "u", "ü"};
379 icu_coll_sort("de", de_1_len, de_1_src, de_1_cck);
380 icu_coll_sort("de_AT", de_1_len, de_1_src, de_1_cck);
381 icu_coll_sort("de_DE", de_1_len, de_1_src, de_1_cck);