1 /* $Id: icu_I18N.c,v 1.3 2007-05-01 13:16:09 marc Exp $
2 Copyright (c) 2006-2007, Index Data.
4 This file is part of Pazpar2.
6 Pazpar2 is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with Pazpar2; see the file LICENSE. If not, write to the
18 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
28 #include <yaz/timing.h>
39 #include <unicode/ustring.h> /* some more string fcns*/
40 #include <unicode/uchar.h> /* char names */
43 //#include <unicode/ustdio.h>
44 //#include <unicode/utypes.h> /* Basic ICU data types */
45 #include <unicode/ucol.h>
46 //#include <unicode/ucnv.h> /* C Converter API */
47 //#include <unicode/uloc.h>
48 //#include <unicode/ubrk.h>
49 /* #include <unicode/unistr.h> */
52 // forward declarations for helper functions
54 int icu_check_status (UErrorCode status);
56 UChar* icu_utf16_from_utf8(UChar *utf16,
61 UChar* icu_utf16_from_utf8n(UChar *utf16,
68 char* icu_utf16_to_utf8(char *utf8,
75 int32_t icu_utf16_casemap(UChar *dest16, int32_t dest16_cap,
76 const UChar *src16, int32_t src16_len,
77 const char *locale, char action);
80 // source code of all functions
82 int icu_check_status (UErrorCode status)
86 "ICU Error: %d %s\n", status, u_errorName(status));
91 UChar* icu_utf16_from_utf8(UChar *utf16,
96 size_t utf8_len = strlen(utf8);
97 return icu_utf16_from_utf8n(utf16, utf16_cap, utf16_len,
102 UChar* icu_utf16_from_utf8n(UChar *utf16,
108 UErrorCode status = U_ZERO_ERROR;
109 u_strFromUTF8(utf16, utf16_cap, utf16_len, utf8, (int32_t) utf8_len,
111 if (U_ZERO_ERROR != icu_check_status(status))
118 char* icu_utf16_to_utf8(char *utf8,
124 UErrorCode status = U_ZERO_ERROR;
125 u_strToUTF8(utf8, (int32_t) utf8_cap, (int32_t *)utf8_len,
126 utf16, utf16_len, &status);
127 if (U_ZERO_ERROR != icu_check_status(status))
134 int32_t icu_utf16_casemap(UChar *dest16, int32_t dest16_cap,
135 const UChar *src16, int32_t src16_len,
136 const char *locale, char action)
138 UErrorCode status = U_ZERO_ERROR;
139 int32_t dest16_len = 0;
143 dest16_len = u_strToLower(dest16, dest16_cap, src16, src16_len,
147 dest16_len = u_strToUpper(dest16, dest16_cap, src16, src16_len,
151 dest16_len = u_strToTitle(dest16, dest16_cap, src16, src16_len,
155 dest16_len = u_strFoldCase(dest16, dest16_cap, src16, src16_len,
156 U_FOLD_CASE_DEFAULT, &status);
164 if (U_ZERO_ERROR != icu_check_status(status))
171 char * icu_casemap(NMEM nmem, char *buf, size_t buf_cap,
172 size_t *dest8_len, const char *src8,
173 const char *locale, char action)
175 size_t src8_len = strlen(src8);
182 if (!buf || !(buf_cap > 0) || !src8_len)
185 // converting buf to utf16
186 buf = (char *)icu_utf16_from_utf8n((UChar *) buf,
187 (int32_t) buf_cap, &buf_len,
191 buf_len = (size_t) icu_utf16_casemap((UChar *)buf, (int32_t) buf_cap,
192 (const UChar *)buf, (int32_t) buf_len,
195 // converting buf to utf8
196 buf = icu_utf16_to_utf8(buf, buf_cap, (size_t *) &buf_len,
197 (const UChar *) buf, (int32_t) buf_len);
200 // copying out to nmem
204 *dest8_len = buf_len;
206 dest8 = nmem_strdup(nmem, buf);
211 struct icu_termmap * icu_termmap_create(NMEM nmem)
213 struct icu_termmap *itmp = nmem_malloc(nmem, sizeof(*itmp));
221 int icu_termmap_cmp(const void *vp1, const void *vp2)
223 struct icu_termmap *itmp1 = *(struct icu_termmap **) vp1;
224 struct icu_termmap *itmp2 = *(struct icu_termmap **) vp2;
228 size_t len = itmp1->sort_len;
229 // minimum sortkey length
230 if (itmp2->sort_len < len)
231 len = itmp2->sort_len;
233 cmp = strncmp(itmp1->sort_key, itmp2->sort_key, len);
235 if (cmp == 0 && (itmp1->sort_len < itmp2->sort_len))
238 if (cmp == 0 && (itmp1->sort_len > itmp2->sort_len))
241 cmp = strcmp(itmp1->sort_key, itmp2->sort_key);
248 char * icu_sortmap(NMEM nmem, char *buf, size_t buf_cap,
249 size_t *dest8_len, const char *src8,
252 size_t src8_len = strlen(src8);
259 if (!buf || !(buf_cap > 0) || !src8_len)
262 // converting buf to utf16
263 buf = (char *)icu_utf16_from_utf8n((UChar *) buf,
264 (int32_t) buf_cap, &buf_len,
268 //buf_len = (size_t) icu_utf16_casemap((UChar *)buf, (int32_t) buf_cap,
269 // (const UChar *)buf, (int32_t) buf_len,
274 UErrorCode status = U_ZERO_ERROR;
276 UCollator * coll = ucol_open (locale, &status);
277 if (U_ZERO_ERROR != icu_check_status(status))
280 ucol_getSortKey(coll, (const UChar *) buf, (int32_t) buf_len,
281 (uint8_t *) buf, (int32_t) buf_cap);
287 // copying out to nmem
291 *dest8_len = buf_len;
293 dest8 = nmem_strdup(nmem, buf);
308 * indent-tabs-mode: nil
310 * vim: shiftwidth=4 tabstop=8 expandtab