X-Git-Url: http://lists.indexdata.dk/cgi-bin?a=blobdiff_plain;f=src%2Ficu_I18N.h;h=ff6bf3102ce3c8f90b76bca520e489ff22f402eb;hb=6f08d24c048052306511961bb9710694f07b5528;hp=2461801d8ea679bab178ee2188334d70cb50ebf6;hpb=ae2621373444129f49c4063980554c5aed6cb57f;p=pazpar2-moved-to-github.git diff --git a/src/icu_I18N.h b/src/icu_I18N.h index 2461801..ff6bf31 100644 --- a/src/icu_I18N.h +++ b/src/icu_I18N.h @@ -1,4 +1,4 @@ -/* $Id: icu_I18N.h,v 1.9 2007-05-11 08:27:29 marc Exp $ +/* $Id: icu_I18N.h,v 1.13 2007-05-15 15:11:42 marc Exp $ Copyright (c) 2006-2007, Index Data. This file is part of Pazpar2. @@ -41,15 +41,8 @@ -// forward declarations -//struct UBreakIterator; - - - - // declared structs and functions - int icu_check_status (UErrorCode status); struct icu_buf_utf16 @@ -62,6 +55,8 @@ struct icu_buf_utf16 struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity); struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16, size_t capacity); +struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16, + struct icu_buf_utf16 * src16); void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16); @@ -140,6 +135,107 @@ int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer); +struct icu_normalizer +{ + char action; + struct icu_buf_utf16 * rules16; + UParseError parse_error[256]; + UTransliterator * trans; +}; + +struct icu_normalizer * icu_normalizer_create(const char *rules, char action, + UErrorCode *status); + + +void icu_normalizer_destroy(struct icu_normalizer * normalizer); + +int icu_normalizer_normalize(struct icu_normalizer * normalizer, + struct icu_buf_utf16 * dest16, + struct icu_buf_utf16 * src16, + UErrorCode *status); + + +#if 0 +struct icu_token +{ + int32_t token_id; + uint8_t * display8; + uint8_t * norm8; + uint8_t * sort8; +} +#endif + +enum icu_chain_step_type { + ICU_chain_step_type_none, // + ICU_chain_step_type_display, // convert to utf8 display format + ICU_chain_step_type_norm, // convert to utf8 norm format + ICU_chain_step_type_sort, // convert to utf8 sort format + ICU_chain_step_type_charmap, // apply utf16 charmap + ICU_chain_step_type_normalize, // apply utf16 normalization + ICU_chain_step_type_tokenize // apply utf16 tokenization +}; + + + +struct icu_chain_step +{ + // type and action object + enum icu_chain_step_type type; + union { + struct icu_normalizer * normalizer; + struct icu_tokenizer * tokenizer; + } u; + // temprary post-action utf16 buffer + struct icu_buf_utf16 * src16; + struct icu_chain_step * previous; + int end_of_tokens; +}; + + +struct icu_chain; + +struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain, + enum icu_chain_step_type type, + const uint8_t * rule, + struct icu_buf_utf16 * src16, + UErrorCode *status); + + +void icu_chain_step_destroy(struct icu_chain_step * step); + + +struct icu_chain +{ + uint8_t identifier[128]; + uint8_t locale[16]; + + // number of tokens returned so far + int32_t token_count; + + // utf8 output buffers + struct icu_buf_utf8 * display8; + struct icu_buf_utf8 * norm8; + struct icu_buf_utf8 * sort8; + + // utf16 source buffer + struct icu_buf_utf16 * src16; + + // linked list of chain steps + struct icu_chain_step * steps; +}; + +struct icu_chain * icu_chain_create(const uint8_t * identifier, + const uint8_t * locale); + +void icu_chain_destroy(struct icu_chain * chain); + +struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain, + enum icu_chain_step_type type, + const uint8_t * rule, + UErrorCode *status); + + + #endif // HAVE_ICU #endif // ICU_I18NL_H