-/* $Id: icu_I18N.h,v 1.6 2007-05-07 12:18:34 marc Exp $
+/* $Id: icu_I18N.h,v 1.11 2007-05-11 10:38:42 marc Exp $
Copyright (c) 2006-2007, Index Data.
-This file is part of Pazpar2.
+ This file is part of Pazpar2.
-Pazpar2 is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
+ Pazpar2 is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2, or (at your option) any later
+ version.
-Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+ Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
-You should have received a copy of the GNU General Public License
-along with Pazpar2; see the file LICENSE. If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
- */
+ You should have received a copy of the GNU General Public License
+ along with Pazpar2; see the file LICENSE. If not, write to the
+ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA.
+*/
#ifndef ICU_I18NL_H
#define ICU_I18NL_H
//#include <unicode/ucnv.h> /* C Converter API */
//#include <unicode/ustring.h> /* some more string fcns*/
//#include <unicode/uloc.h>
-//#include <unicode/ubrk.h>
+#include <unicode/ubrk.h>
//#include <unicode/unistr.h>
+#include <unicode/utrans.h>
+
+
+
+// forward declarations
+//struct UBreakIterator;
+
+
+
+
+// declared structs and functions
int icu_check_status (UErrorCode status);
struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity);
struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
size_t capacity);
+struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
+ struct icu_buf_utf16 * src16);
void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16);
struct icu_buf_utf16 * src16,
UErrorCode * status);
+int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
+ struct icu_buf_utf16 * src16,
+ const char *locale, char action,
+ UErrorCode *status);
UErrorCode icu_sortkey8_from_utf16(UCollator *coll,
struct icu_buf_utf8 * dest8,
struct icu_buf_utf16 * src16,
UErrorCode * status);
+struct icu_tokenizer
+{
+ char locale[16];
+ char action;
+ UBreakIterator* bi;
+ struct icu_buf_utf16 * buf16;
+ int32_t token_count;
+ int32_t token_id;
+ int32_t token_start;
+ int32_t token_end;
+ // keep always invariant
+ // 0 <= token_start
+ // <= token_end
+ // <= buf16->utf16_len
+ // and invariant
+ // 0 <= token_id <= token_count
+};
+struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
+ UErrorCode *status);
+void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer);
+int icu_tokenizer_attach(struct icu_tokenizer * tokenizer,
+ struct icu_buf_utf16 * src16, UErrorCode *status);
+int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer,
+ struct icu_buf_utf16 * tkn16,
+ UErrorCode *status);
+int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer);
+int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer);
+int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer);
+int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer);
+int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer);
-// CRAP to Follow here ...
-
-#if 0
-struct icu_termmap
+struct icu_normalizer
{
- char * sort_key; // standard C string '\0' terminated
- char * norm_term; // standard C utf-8 string
- char * disp_term; // standard C utf-8 string
+ char action;
+ struct icu_buf_utf16 * rules16;
+ UParseError parse_error[256];
+ UTransliterator * trans;
};
-struct icu_termmap * icu_termmap_create(NMEM nmem);
+struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
+ UErrorCode *status);
+
-int icu_termmap_cmp(const void *vp1, const void *vp2);
+void icu_normalizer_destroy(struct icu_normalizer * normalizer);
-char * icu_casemap(NMEM nmem, char *buf, size_t buf_cap,
- size_t *dest8_len, const char *src8,
- const char *locale, char action);
+int icu_normalizer_normalize(struct icu_normalizer * normalizer,
+ struct icu_buf_utf16 * dest16,
+ struct icu_buf_utf16 * src16,
+ UErrorCode *status);
-char * icu_sortmap(NMEM nmem, char *buf, size_t buf_cap,
- size_t *dest8_len, const char *src8,
- const char *locale);
-#endif // 0
#endif // HAVE_ICU