-/* $Id: icu_I18N.h,v 1.12 2007-05-14 13:51:24 marc Exp $
- Copyright (c) 2006-2007, Index Data.
+/* This file is part of Pazpar2.
+ Copyright (C) 2006-2008 Index Data
- This file is part of Pazpar2.
+Pazpar2 is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
- Pazpar2 is free software; you can redistribute it and/or modify it under
- the terms of the GNU General Public License as published by the Free
- Software Foundation; either version 2, or (at your option) any later
- version.
+Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
- Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- for more details.
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- You should have received a copy of the GNU General Public License
- along with Pazpar2; see the file LICENSE. If not, write to the
- Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA.
*/
#ifndef ICU_I18NL_H
#define ICU_I18NL_H
-#ifdef HAVE_ICU
-
#include <yaz/nmem.h>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
#include <unicode/utypes.h> /* Basic ICU data types */
#include <unicode/uchar.h> /* char names */
struct icu_buf_utf16 * src16,
UErrorCode * status);
+struct icu_casemap
+{
+ char locale[16];
+ char action;
+};
+
+struct icu_casemap * icu_casemap_create(const char *locale, char action,
+ UErrorCode *status);
+
+void icu_casemap_destroy(struct icu_casemap * casemap);
+
+int icu_casemap_casemap(struct icu_casemap * casemap,
+ struct icu_buf_utf16 * dest16,
+ struct icu_buf_utf16 * src16,
+ UErrorCode *status);
+
int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
struct icu_buf_utf16 * src16,
const char *locale, char action,
}
#endif
+
enum icu_chain_step_type {
ICU_chain_step_type_none, //
ICU_chain_step_type_display, // convert to utf8 display format
- ICU_chain_step_type_norm, // convert to utf8 norm format
- ICU_chain_step_type_sort, // convert to utf8 sort format
- ICU_chain_step_type_charmap, // apply utf16 charmap
+ ICU_chain_step_type_index, // convert to utf8 index format
+ ICU_chain_step_type_sortkey, // convert to utf8 sortkey format
+ ICU_chain_step_type_casemap, // apply utf16 charmap
ICU_chain_step_type_normalize, // apply utf16 normalization
ICU_chain_step_type_tokenize // apply utf16 tokenization
};
// type and action object
enum icu_chain_step_type type;
union {
+ struct icu_casemap * casemap;
struct icu_normalizer * normalizer;
struct icu_tokenizer * tokenizer;
} u;
// temprary post-action utf16 buffer
struct icu_buf_utf16 * buf16;
- struct icu_chain_step * next;
+ struct icu_chain_step * previous;
+ int more_tokens;
+ int need_new_token;
};
+struct icu_chain;
+
+struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain,
+ enum icu_chain_step_type type,
+ const uint8_t * rule,
+ struct icu_buf_utf16 * buf16,
+ UErrorCode *status);
+
+
+void icu_chain_step_destroy(struct icu_chain_step * step);
+
+
struct icu_chain
{
uint8_t identifier[128];
void icu_chain_destroy(struct icu_chain * chain);
-struct icu_chain_step * icu_chain_append_step(struct icu_chain * chain,
+struct icu_chain * icu_chain_xml_config(xmlNode *xml_node,
+ UErrorCode * status);
+
+
+struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain,
enum icu_chain_step_type type,
- const uint8_t * rule);
+ const uint8_t * rule,
+ UErrorCode *status);
+
+
+int icu_chain_step_next_token(struct icu_chain * chain,
+ struct icu_chain_step * step,
+ UErrorCode *status);
+
+int icu_chain_assign_cstr(struct icu_chain * chain,
+ const char * src8cstr,
+ UErrorCode *status);
+
+int icu_chain_next_token(struct icu_chain * chain,
+ UErrorCode *status);
+
+int icu_chain_get_token_count(struct icu_chain * chain);
+
+const char * icu_chain_get_display(struct icu_chain * chain);
+
+const char * icu_chain_get_norm(struct icu_chain * chain);
+
+const char * icu_chain_get_sort(struct icu_chain * chain);
+
-void icu_chain_step_destroy(struct icu_chain_step * step);
-#endif // HAVE_ICU
#endif // ICU_I18NL_H