From: Marc Cromme Date: Mon, 7 May 2007 13:08:26 +0000 (+0000) Subject: remove now unnecessary ICU bug experimenting file, useful content moved into icu_I18N.c X-Git-Tag: PAZPAR2.1.0.0~169 X-Git-Url: http://lists.indexdata.dk/?a=commitdiff_plain;h=88f57e92f60b6c789fb9a6742edc117e5b5f895f;p=pazpar2-moved-to-github.git remove now unnecessary ICU bug experimenting file, useful content moved into icu_I18N.c --- diff --git a/src/icu_bug_2.c b/src/icu_bug_2.c deleted file mode 100644 index 6673270..0000000 --- a/src/icu_bug_2.c +++ /dev/null @@ -1,399 +0,0 @@ -// Make command on debian 64 bit testing dist -/* -gcc -g -Wall `icu-config --cppflags` `icu-config --ldflags` -o icu_bug_2 icu_bug_2.c -snatched from http://www.icu-project.org/userguide/Collate_API.html -and changed. -added a struct icu_termmap such that I actually can see the output -*/ - -#include -#include -#include - -#include /* some more string fcns*/ -#include /* char names */ - - -//#include -//#include /* Basic ICU data types */ -#include -//#include /* C Converter API */ -//#include -//#include -//#include - - -#define MAX_KEY_SIZE 256 - -struct icu_buf_utf16 -{ - UChar * utf16; - int32_t utf16_len; - int32_t utf16_cap; -}; - - -struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity) -{ - struct icu_buf_utf16 * buf16 - = (struct icu_buf_utf16 *) malloc(sizeof(struct icu_buf_utf16)); - - buf16->utf16 = 0; - buf16->utf16_len = 0; - buf16->utf16_cap = 0; - - if (capacity > 0){ - buf16->utf16 = (UChar *) malloc(sizeof(UChar) * capacity); - buf16->utf16[0] = (UChar) 0; - buf16->utf16_cap = capacity; - } - return buf16; -}; - - -struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16, - size_t capacity) -{ - if (buf16){ - if (capacity > 0){ - if (0 == buf16->utf16) - buf16->utf16 = (UChar *) malloc(sizeof(UChar) * capacity); - else - buf16->utf16 - = (UChar *) realloc(buf16->utf16, sizeof(UChar) * capacity); - buf16->utf16[0] = (UChar) 0; - buf16->utf16_len = 0; - buf16->utf16_cap = capacity; - } - else { - if (buf16->utf16) - free(buf16->utf16); - buf16->utf16 = 0; - buf16->utf16_len = 0; - buf16->utf16_cap = 0; - } - } - - return buf16; -}; - - -void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16) -{ - if (buf16){ - if (buf16->utf16) - free(buf16->utf16); - free(buf16); - } -}; - - - -struct icu_buf_utf8 -{ - uint8_t * utf8; - int32_t utf8_len; - int32_t utf8_cap; -}; - - - -struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity) -{ - struct icu_buf_utf8 * buf8 - = (struct icu_buf_utf8 *) malloc(sizeof(struct icu_buf_utf8)); - - buf8->utf8 = 0; - buf8->utf8_len = 0; - buf8->utf8_cap = 0; - - if (capacity > 0){ - buf8->utf8 = (uint8_t *) malloc(sizeof(uint8_t) * capacity); - buf8->utf8[0] = (uint8_t) 0; - buf8->utf8_cap = capacity; - } - return buf8; -}; - - - -struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8, - size_t capacity) -{ - if (buf8){ - if (capacity > 0){ - if (0 == buf8->utf8) - buf8->utf8 = (uint8_t *) malloc(sizeof(uint8_t) * capacity); - else - buf8->utf8 - = (uint8_t *) realloc(buf8->utf8, sizeof(uint8_t) * capacity); - buf8->utf8[0] = (uint8_t) 0; - buf8->utf8_len = 0; - buf8->utf8_cap = capacity; - } - else { - if (buf8->utf8) - free(buf8->utf8); - buf8->utf8 = 0; - buf8->utf8_len = 0; - buf8->utf8_cap = 0; - } - } - - return buf8; -}; - - - -void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8) -{ - if (buf8){ - if (buf8->utf8) - free(buf8->utf8); - free(buf8); - } -}; - - - -UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16, - struct icu_buf_utf8 * src8, - UErrorCode * status) -{ - int32_t utf16_len = 0; - - u_strFromUTF8(dest16->utf16, dest16->utf16_cap, - &utf16_len, - (const char *) src8->utf8, src8->utf8_len, status); - - // check for buffer overflow, resize and retry - if (*status == U_BUFFER_OVERFLOW_ERROR - //|| dest16->utf16_len > dest16->utf16_cap - ){ - icu_buf_utf16_resize(dest16, utf16_len * 2); - *status = U_ZERO_ERROR; - u_strFromUTF8(dest16->utf16, dest16->utf16_cap, - &utf16_len, - (const char *) src8->utf8, src8->utf8_len, status); - } - - if (*status != U_BUFFER_OVERFLOW_ERROR - && utf16_len < dest16->utf16_cap) - dest16->utf16_len = utf16_len; - else { - dest16->utf16[0] = (UChar) 0; - dest16->utf16_len = 0; - } - - return *status; -}; - - - -UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16, - const char * src8cstr, - UErrorCode * status) -{ - size_t src8cstr_len = 0; - int32_t utf16_len = 0; - - src8cstr_len = strlen(src8cstr); - - u_strFromUTF8(dest16->utf16, dest16->utf16_cap, - &utf16_len, - src8cstr, src8cstr_len, status); - - // check for buffer overflow, resize and retry - if (*status == U_BUFFER_OVERFLOW_ERROR - //|| dest16->utf16_len > dest16->utf16_cap - ){ - icu_buf_utf16_resize(dest16, utf16_len * 2); - *status = U_ZERO_ERROR; - u_strFromUTF8(dest16->utf16, dest16->utf16_cap, - &utf16_len, - src8cstr, src8cstr_len, status); - } - - if (*status != U_BUFFER_OVERFLOW_ERROR - && utf16_len < dest16->utf16_cap) - dest16->utf16_len = utf16_len; - else { - dest16->utf16[0] = (UChar) 0; - dest16->utf16_len = 0; - } - - return *status; -}; - - -UErrorCode icu_sortkey8_from_utf16(UCollator *coll, - struct icu_buf_utf8 * dest8, - struct icu_buf_utf16 * src16, - UErrorCode * status) -{ - - int32_t sortkey_len = 0; - - sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len, - dest8->utf8, dest8->utf8_cap); - - // check for buffer overflow, resize and retry - if (sortkey_len > dest8->utf8_cap) { - icu_buf_utf8_resize(dest8, sortkey_len * 2); - sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len, - dest8->utf8, dest8->utf8_cap); - } - - if (sortkey_len > 0) - dest8->utf8_len = sortkey_len; - - return *status; -}; - - - - -struct icu_termmap -{ - uint8_t sort_key[MAX_KEY_SIZE]; // standard C string '\0' terminated - char disp_term[MAX_KEY_SIZE]; // standard C utf-8 string -}; - - - -int icu_termmap_cmp(const void *vp1, const void *vp2) -{ - struct icu_termmap *itmp1 = *(struct icu_termmap **) vp1; - struct icu_termmap *itmp2 = *(struct icu_termmap **) vp2; - - int cmp = 0; - - cmp = strcmp((const char *)itmp1->sort_key, - (const char *)itmp2->sort_key); - return cmp; -} - - -int icu_check_status(UErrorCode status) -{ - if(!U_SUCCESS(status)) - printf("ICU status: %d %s\n", status, u_errorName(status)); - return status; -} - - - -int icu_coll_sort(const char * locale, int src_list_len, - const char ** src_list, const char ** chk_list) -{ - UErrorCode status = U_ZERO_ERROR; - int success = 1; - - struct icu_buf_utf8 * buf8 = icu_buf_utf8_create(0); - struct icu_buf_utf16 * buf16 = icu_buf_utf16_create(0); - - int i; - - struct icu_termmap * list[src_list_len]; - - UCollator *coll = ucol_open(locale, &status); - icu_check_status(status); - - if(!U_SUCCESS(status)) - return 0; - - // assigning display terms and sort keys using buf 8 and buf16 - for( i = 0; i < src_list_len; i++) - { - - list[i] = (struct icu_termmap *) malloc(sizeof(struct icu_termmap)); - - // copy display term - strcpy(list[i]->disp_term, src_list[i]); - - // transforming to UTF16 - icu_utf16_from_utf8_cstr(buf16, list[i]->disp_term, &status); - icu_check_status(status); - - // computing sortkeys - icu_sortkey8_from_utf16(coll, buf8, buf16, &status); - icu_check_status(status); - - // assigning sortkeys - memcpy(list[i]->sort_key, buf8->utf8, buf8->utf8_len); - //strncpy(list[i]->sort_key, buf8->utf8, buf8->utf8_len); - //strcpy((char *) list[i]->sort_key, (const char *) buf8->utf8); - } - - - // do the sorting - qsort(list, src_list_len, - sizeof(struct icu_termmap *), icu_termmap_cmp); - - // checking correct sorting - for (i = 0; i < src_list_len; i++){ - if (0 != strcmp(list[i]->disp_term, chk_list[i])){ - success = 0; - } - } - - if(!success){ - printf("\nERROR\n"); - printf("Input str: '%s' : ", locale); - for (i = 0; i < src_list_len; i++) { - printf(" '%s'", list[i]->disp_term); - } - printf("\n"); - printf("ICU sort: '%s' : ", locale); - for (i = 0; i < src_list_len; i++) { - printf(" '%s'", list[i]->disp_term); - //printf("(%d|%d)", list[i]->sort_key[0],list[i]->sort_key[1]); - } - printf("\n"); - printf("Expected: '%s' : ", locale); - for (i = 0; i < src_list_len; i++) { - printf(" '%s'", chk_list[i]); - } - printf("\n"); - } - - - ucol_close(coll); - - icu_buf_utf8_destroy(buf8); - icu_buf_utf16_destroy(buf16); - - return success; -}; - - -int main(int argc, char **argv) -{ - - size_t en_1_len = 6; - const char * en_1_src[6] = {"z", "K", "a", "A", "Z", "k"}; - const char * en_1_cck[6] = {"a", "A", "k", "K", "z", "Z"}; - icu_coll_sort("en", en_1_len, en_1_src, en_1_cck); - icu_coll_sort("en_AU", en_1_len, en_1_src, en_1_cck); - icu_coll_sort("en_CA", en_1_len, en_1_src, en_1_cck); - icu_coll_sort("en_GB", en_1_len, en_1_src, en_1_cck); - icu_coll_sort("en_US", en_1_len, en_1_src, en_1_cck); - - - size_t da_1_len = 6; - const char * da_1_src[6] = {"z", "å", "o", "æ", "a", "ø"}; - const char * da_1_cck[6] = {"a", "o", "z", "æ", "ø", "å"}; - icu_coll_sort("da", da_1_len, da_1_src, da_1_cck); - icu_coll_sort("da_DK", da_1_len, da_1_src, da_1_cck); - - - size_t de_1_len = 9; - const char * de_1_src[9] = {"u", "ä", "o", "t", "s", "ß", "ü", "ö", "a"}; - const char * de_1_cck[9] = {"a", "ä", "o", "ö", "s", "ß", "t", "u", "ü"}; - icu_coll_sort("de", de_1_len, de_1_src, de_1_cck); - icu_coll_sort("de_AT", de_1_len, de_1_src, de_1_cck); - icu_coll_sort("de_DE", de_1_len, de_1_src, de_1_cck); - - return 0; -}; -