2 * Copyright (c) 1995-2007, Index Data
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Index Data nor the names of its contributors
13 * may be used to endorse or promote products derived from this
14 * software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #include <libxml/parser.h>
34 #include <libxml/tree.h>
36 #include <unicode/utypes.h> /* Basic ICU data types */
37 #include <unicode/uchar.h> /* char names */
39 #include <unicode/ucol.h>
40 #include <unicode/ubrk.h>
41 #include <unicode/utrans.h>
44 // #define ICU_CHAIN_SORTKEY
45 #undef ICU_CHAIN_SORTKEY
47 /* declared structs and functions */
49 int icu_check_status (UErrorCode status);
58 struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity);
60 struct icu_buf_utf16 * icu_buf_utf16_clear(struct icu_buf_utf16 * buf16);
62 struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
65 struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
66 struct icu_buf_utf16 * src16);
68 void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16);
79 struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity);
81 struct icu_buf_utf8 * icu_buf_utf8_clear(struct icu_buf_utf8 * buf8);
83 struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
86 void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8);
89 UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16,
90 struct icu_buf_utf8 * src8,
93 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
94 const char * src8cstr,
98 UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
99 struct icu_buf_utf16 * src16,
100 UErrorCode * status);
108 struct icu_casemap * icu_casemap_create(const char *locale, char action,
111 void icu_casemap_destroy(struct icu_casemap * casemap);
113 int icu_casemap_casemap(struct icu_casemap * casemap,
114 struct icu_buf_utf16 * dest16,
115 struct icu_buf_utf16 * src16,
118 int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
119 struct icu_buf_utf16 * src16,
120 const char *locale, char action,
123 UErrorCode icu_sortkey8_from_utf16(UCollator *coll,
124 struct icu_buf_utf8 * dest8,
125 struct icu_buf_utf16 * src16,
126 UErrorCode * status);
133 struct icu_buf_utf16 * buf16;
139 keep always invariant
144 0 <= token_id <= token_count
148 struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
151 void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer);
153 int icu_tokenizer_attach(struct icu_tokenizer * tokenizer,
154 struct icu_buf_utf16 * src16, UErrorCode *status);
156 int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer,
157 struct icu_buf_utf16 * tkn16,
160 int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer);
161 int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer);
162 int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer);
163 int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer);
164 int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer);
168 struct icu_normalizer
171 struct icu_buf_utf16 * rules16;
172 UParseError parse_error[256];
173 UTransliterator * trans;
176 struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
180 void icu_normalizer_destroy(struct icu_normalizer * normalizer);
182 int icu_normalizer_normalize(struct icu_normalizer * normalizer,
183 struct icu_buf_utf16 * dest16,
184 struct icu_buf_utf16 * src16,
187 enum icu_chain_step_type {
188 ICU_chain_step_type_none,
189 ICU_chain_step_type_display, /* convert to utf8 display format */
190 #ifdef ICU_CHAIN_SORTKEY
191 ICU_chain_step_type_index, /* convert to utf8 index format */
192 ICU_chain_step_type_sortkey, /* convert to utf8 sortkey format */
194 ICU_chain_step_type_casemap, /* apply utf16 charmap */
195 ICU_chain_step_type_normalize, /* apply utf16 normalization */
196 ICU_chain_step_type_tokenize /* apply utf16 tokenization */
201 struct icu_chain_step
203 /* type and action object */
204 enum icu_chain_step_type type;
206 struct icu_casemap * casemap;
207 struct icu_normalizer * normalizer;
208 struct icu_tokenizer * tokenizer;
210 /* temprary post-action utf16 buffer */
211 struct icu_buf_utf16 * buf16;
212 struct icu_chain_step * previous;
220 struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain,
221 enum icu_chain_step_type type,
222 const uint8_t * rule,
223 struct icu_buf_utf16 * buf16,
227 void icu_chain_step_destroy(struct icu_chain_step * step);
237 /* number of tokens returned so far */
240 /* utf8 output buffers */
241 struct icu_buf_utf8 * display8;
242 struct icu_buf_utf8 * norm8;
243 struct icu_buf_utf8 * sort8;
245 /* utf16 source buffer */
246 struct icu_buf_utf16 * src16;
248 /* linked list of chain steps */
249 struct icu_chain_step * steps;
252 struct icu_chain * icu_chain_create(const uint8_t * locale,
254 UErrorCode * status);
256 void icu_chain_destroy(struct icu_chain * chain);
258 struct icu_chain * icu_chain_xml_config(xmlNode *xml_node,
259 const uint8_t * locale,
261 UErrorCode * status);
263 struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain,
264 enum icu_chain_step_type type,
265 const uint8_t * rule,
268 int icu_chain_step_next_token(struct icu_chain * chain,
269 struct icu_chain_step * step,
272 int icu_chain_assign_cstr(struct icu_chain * chain,
273 const char * src8cstr,
276 int icu_chain_next_token(struct icu_chain * chain,
279 int icu_chain_get_token_count(struct icu_chain * chain);
281 const char * icu_chain_get_display(struct icu_chain * chain);
283 const char * icu_chain_get_norm(struct icu_chain * chain);
285 const char * icu_chain_get_sort(struct icu_chain * chain);
287 const UCollator * icu_chain_get_coll(struct icu_chain * chain);
289 #endif /* ICU_I18NL_H */
294 * indent-tabs-mode: nil
296 * vim: shiftwidth=4 tabstop=8 expandtab