struct icu_casemap * icu_casemap_create(char action, UErrorCode *status);
+struct icu_casemap *icu_casemap_clone(struct icu_casemap *old);
+
void icu_casemap_destroy(struct icu_casemap * casemap);
int icu_casemap_casemap(struct icu_casemap * casemap,
struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
UErrorCode *status);
+struct icu_tokenizer *icu_tokenizer_clone(struct icu_tokenizer *old);
void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer);
int icu_tokenizer_attach(struct icu_tokenizer * tokenizer,
struct icu_transform * icu_transform_create(const char *id, char action,
const char *rules,
UErrorCode *status);
-
+struct icu_transform *icu_transform_clone(struct icu_transform *old);
void icu_transform_destroy(struct icu_transform * transform);
int icu_transform_trans(struct icu_transform * transform,
return casemap;
}
+struct icu_casemap *icu_casemap_clone(struct icu_casemap *old)
+{
+ struct icu_casemap * casemap
+ = (struct icu_casemap *) xmalloc(sizeof(struct icu_casemap));
+ casemap->action = old->action;
+ return casemap;
+}
+
void icu_casemap_destroy(struct icu_casemap * casemap)
{
xfree(casemap);
struct icu_buf_utf8 * norm8;
/* linked list of chain steps */
- struct icu_chain_step * steps;
+ struct icu_chain_step * csteps;
};
int icu_check_status(UErrorCode status)
{
struct icu_chain_step * step = 0;
- if(!chain || !type || !rule)
+ if (!chain || !type || !rule)
return 0;
step = (struct icu_chain_step *) xmalloc(sizeof(struct icu_chain_step));
xfree(step);
}
+struct icu_chain_step *icu_chain_step_clone(struct icu_chain_step *old)
+{
+ struct icu_chain_step *step = 0;
+ struct icu_chain_step **sp = &step;
+ while (old)
+ {
+ *sp = (struct icu_chain_step *) xmalloc(sizeof(**sp));
+ (*sp)->type = old->type;
+
+ switch ((*sp)->type)
+ {
+ case ICU_chain_step_type_display:
+ break;
+ case ICU_chain_step_type_casemap:
+ (*sp)->u.casemap = icu_casemap_clone(old->u.casemap);
+ break;
+ case ICU_chain_step_type_transform:
+ case ICU_chain_step_type_transliterate:
+ (*sp)->u.transform = icu_transform_clone(old->u.transform);
+ break;
+ case ICU_chain_step_type_tokenize:
+ (*sp)->u.tokenizer = icu_tokenizer_clone(old->u.tokenizer);
+ break;
+ case ICU_chain_step_type_none:
+ break;
+ }
+ old = old->previous;
+ sp = &(*sp)->previous;
+ }
+ *sp = 0;
+ return step;
+}
+
struct icu_chain *icu_chain_create(const char *locale, int sort,
UErrorCode * status)
{
return 0;
chain->norm8 = icu_buf_utf8_create(0);
- chain->steps = 0;
+ chain->csteps = 0;
return chain;
}
icu_buf_utf8_destroy(chain->norm8);
if (chain->iter)
icu_iter_destroy(chain->iter);
- icu_chain_step_destroy(chain->steps);
+ icu_chain_step_destroy(chain->csteps);
xfree(chain->locale);
xfree(chain);
}
return chain;
}
+
static struct icu_chain_step *icu_chain_insert_step(
struct icu_chain * chain, enum icu_chain_step_type type,
const uint8_t * rule, UErrorCode *status)
step = icu_chain_step_create(chain, type, rule,
status);
- step->previous = chain->steps;
- chain->steps = step;
+ step->previous = chain->csteps;
+ chain->csteps = step;
return step;
}
struct icu_buf_utf8 *sort8;
struct icu_buf_utf16 *input;
int token_count;
+ struct icu_chain_step *steps;
};
void icu_utf16_print(struct icu_buf_utf16 *src16)
iter->sort8 = icu_buf_utf8_create(0);
iter->token_count = 0;
iter->last = 0; /* no last returned string (yet) */
+#if 0
+ iter->steps = icu_chain_step_clone(chain->csteps);
+#else
+ iter->steps = 0;
+#endif
/* fill and assign input string.. It will be 0 after
first iteration */
iter->input = icu_buf_utf16_create(0);
icu_utf16_from_utf8_cstr(iter->input, src8cstr, &iter->status);
return iter;
-
}
}
icu_buf_utf8_destroy(iter->sort8);
if (iter->input)
icu_buf_utf16_destroy(iter->input);
+ icu_chain_step_destroy(iter->steps);
xfree(iter);
}
}
else
{
/* on first call, iter->input is the input string. Thereafter: 0. */
- iter->last = icu_iter_invoke(iter, iter->chain->steps, iter->input);
+ iter->last = icu_iter_invoke(iter, iter->steps ?
+ iter->steps : iter->chain->csteps,
+ iter->input);
iter->input = 0;
if (!iter->last)
#include <yaz/log.h>
+#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
*/
};
-struct icu_tokenizer *icu_tokenizer_create(const char *locale, char action,
- UErrorCode *status)
+static void icu_tokenizer_reset(struct icu_tokenizer *tokenizer,
+ char action)
{
- struct icu_tokenizer * tokenizer
- = (struct icu_tokenizer *) xmalloc(sizeof(struct icu_tokenizer));
-
tokenizer->action = action;
tokenizer->bi = 0;
tokenizer->buf16 = icu_buf_utf16_create(0);
tokenizer->token_id = 0;
tokenizer->token_start = 0;
tokenizer->token_end = 0;
+ tokenizer->bi = 0;
+}
+
+struct icu_tokenizer *icu_tokenizer_clone(struct icu_tokenizer *old)
+{
+ uint32_t bufferSize = 10000;
+ UErrorCode status = 0;
+ struct icu_tokenizer * tokenizer
+ = (struct icu_tokenizer *) xmalloc(sizeof(struct icu_tokenizer));
+
+ assert(old);
+ icu_tokenizer_reset(tokenizer, old->action);
+ assert(old->bi);
+ tokenizer->bi = ubrk_safeClone(old->bi, NULL, &bufferSize, &status);
+ if (U_SUCCESS(status))
+ return tokenizer;
+ return tokenizer;
+}
+
+struct icu_tokenizer *icu_tokenizer_create(const char *locale, char action,
+ UErrorCode *status)
+{
+ struct icu_tokenizer * tokenizer
+ = (struct icu_tokenizer *) xmalloc(sizeof(struct icu_tokenizer));
+ icu_tokenizer_reset(tokenizer, action);
switch (tokenizer->action)
{
case 'l':
#include <yaz/log.h>
+#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
UTransliterator * trans;
};
+struct icu_transform *icu_transform_clone(struct icu_transform *old)
+{
+ struct icu_transform *transform
+ = (struct icu_transform *) xmalloc(sizeof(struct icu_transform));
+ UErrorCode status;
+ assert(old);
+ transform->action = old->action;
+ assert(old->trans);
+ transform->trans = utrans_clone(old->trans, &status);
+ assert(transform->trans);
+ return transform;
+}
+
struct icu_transform * icu_transform_create(const char *id, char action,
const char *rules,
UErrorCode *status)
while (icu_chain_next_token(chain, &status))
{
- ;
- /* printf("%d '%s' '%s'\n",
- icu_chain_token_number(chain),
- icu_chain_token_norm(chain),
- icu_chain_token_display(chain)); */
+ yaz_log(YLOG_LOG, "%d '%s' '%s'",
+ icu_chain_token_number(chain),
+ icu_chain_token_norm(chain),
+ icu_chain_token_display(chain));
}
YAZ_CHECK_EQ(icu_chain_token_number(chain), 7);
while (icu_chain_next_token(chain, &status))
{
- ;
- /* printf("%d '%s' '%s'\n",
- icu_chain_token_number(chain),
- icu_chain_token_norm(chain),
- icu_chain_token_display(chain)); */
+ yaz_log(YLOG_LOG, "%d '%s' '%s'",
+ icu_chain_token_number(chain),
+ icu_chain_token_norm(chain),
+ icu_chain_token_display(chain));
}