#include <yaz/icu.h>
#endif
+typedef struct pp2_charset_s *pp2_charset_t;
+static pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node);
+static pp2_charset_t pp2_charset_create(struct icu_chain * icu_chn);
+static pp2_charset_t pp2_charset_create_a_to_z(void);
+static void pp2_charset_destroy(pp2_charset_t pct);
+static pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct);
+
/* charset handle */
struct pp2_charset_s {
const char *(*token_next_handler)(pp2_relevance_token_t prt);
const char *(*get_sort_handler)(pp2_relevance_token_t prt);
const char *(*get_display_handler)(pp2_relevance_token_t prt);
- int ref_count;
#if YAZ_HAVE_ICU
struct icu_chain * icu_chn;
UErrorCode icu_sts;
#endif
};
+struct pp2_charset_fact_s {
+ struct pp2_charset_entry *list;
+ int ref_count;
+};
+
+struct pp2_charset_entry {
+ struct pp2_charset_entry *next;
+ pp2_charset_t pct;
+ char *name;
+};
+
+
+static int pp2_charset_fact_add(pp2_charset_fact_t pft,
+ pp2_charset_t pct, const char *default_id);
+
+pp2_charset_fact_t pp2_charset_fact_create(void)
+{
+ pp2_charset_fact_t pft = xmalloc(sizeof(*pft));
+ pft->list = 0;
+ pft->ref_count = 1;
+
+ pp2_charset_fact_add(pft, pp2_charset_create_a_to_z(), "relevance");
+ pp2_charset_fact_add(pft, pp2_charset_create_a_to_z(), "sort");
+ pp2_charset_fact_add(pft, pp2_charset_create_a_to_z(), "mergekey");
+ pp2_charset_fact_add(pft, pp2_charset_create(0), "facet");
+ return pft;
+}
+
+void pp2_charset_fact_destroy(pp2_charset_fact_t pft)
+{
+ if (pft)
+ {
+ assert(pft->ref_count >= 1);
+ --(pft->ref_count);
+ if (pft->ref_count == 0)
+ {
+ struct pp2_charset_entry *pce = pft->list;
+ while (pce)
+ {
+ struct pp2_charset_entry *next = pce->next;
+ pp2_charset_destroy(pce->pct);
+ xfree(pce->name);
+ xfree(pce);
+ pce = next;
+ }
+ xfree(pft);
+ }
+ }
+}
+
+int pp2_charset_fact_add(pp2_charset_fact_t pft,
+ pp2_charset_t pct, const char *default_id)
+{
+ struct pp2_charset_entry *pce;
+
+ for (pce = pft->list; pce; pce = pce->next)
+ if (!strcmp(default_id, pce->name))
+ break;
+
+ if (!pce)
+ {
+ pce = xmalloc(sizeof(*pce));
+ pce->name = xstrdup(default_id);
+ pce->next = pft->list;
+ pft->list = pce;
+ }
+ else
+ {
+ pp2_charset_destroy(pce->pct);
+ }
+ pce->pct = pct;
+ return 0;
+}
+
+int pp2_charset_fact_define(pp2_charset_fact_t pft,
+ xmlNode *xml_node, const char *default_id)
+{
+ int r;
+ pp2_charset_t pct;
+ xmlChar *id;
+
+ assert(xml_node);
+ pct = pp2_charset_create_xml(xml_node);
+ if (!pct)
+ return -1;
+ id = xmlGetProp(xml_node, (xmlChar*) "id");
+ if (id)
+ default_id = (const char *) id;
+ if (!default_id)
+ {
+ pp2_charset_destroy(pct);
+ return -1;
+ }
+ r = pp2_charset_fact_add(pft, pct, default_id);
+ xmlFree(id);
+ return r;
+}
+
+void pp2_charset_fact_incref(pp2_charset_fact_t pft)
+{
+ (pft->ref_count)++;
+}
pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node)
{
#if YAZ_HAVE_ICU
UErrorCode status = U_ZERO_ERROR;
struct icu_chain *chain = 0;
- if (xml_node)
- xml_node = xml_node->children;
while (xml_node && xml_node->type != XML_ELEMENT_NODE)
xml_node = xml_node->next;
chain = icu_chain_xml_config(xml_node, 1, &status);
#endif // YAZ_HAVE_ICU
}
-void pp2_charset_incref(pp2_charset_t pct)
-{
- (pct->ref_count)++;
-}
-
pp2_charset_t pp2_charset_create_a_to_z(void)
{
pp2_charset_t pct = pp2_charset_create(0);
pct->token_next_handler = pp2_relevance_token_null;
pct->get_sort_handler = pp2_get_sort_ascii;
pct->get_display_handler = pp2_get_display_ascii;
- pct->ref_count = 1;
#if YAZ_HAVE_ICU
pct->icu_chn = 0;
if (icu_chn)
void pp2_charset_destroy(pp2_charset_t pct)
{
- if (pct)
- {
- assert(pct->ref_count >= 1);
- --(pct->ref_count);
- if (pct->ref_count == 0)
- {
#if YAZ_HAVE_ICU
- icu_chain_destroy(pct->icu_chn);
+ icu_chain_destroy(pct->icu_chn);
#endif
- xfree(pct);
- }
- }
+ xfree(pct);
+}
+
+pp2_relevance_token_t pp2_relevance_create(pp2_charset_fact_t pft,
+ const char *id)
+{
+ struct pp2_charset_entry *pce;
+ for (pce = pft->list; pce; pce = pce->next)
+ if (!strcmp(id, pce->name))
+ return pp2_relevance_tokenize(pce->pct);
+ return 0;
}
pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct)
#include <yaz/wrbuf.h>
#include <yaz/xmltypes.h>
-struct icu_chain;
-
-typedef struct pp2_charset_s *pp2_charset_t;
typedef struct pp2_relevance_token_s *pp2_relevance_token_t;
+typedef struct pp2_charset_fact_s *pp2_charset_fact_t;
-pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node);
-pp2_charset_t pp2_charset_create(struct icu_chain * icu_chn);
-pp2_charset_t pp2_charset_create_a_to_z(void);
-
-void pp2_charset_destroy(pp2_charset_t pct);
-void pp2_charset_incref(pp2_charset_t pct);
-
-pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct);
void pp2_relevance_first(pp2_relevance_token_t prt,
const char *buf,
int skip_article);
const char *pp2_get_sort(pp2_relevance_token_t prt);
const char *pp2_get_display(pp2_relevance_token_t prt);
+pp2_charset_fact_t pp2_charset_fact_create(void);
+void pp2_charset_fact_destroy(pp2_charset_fact_t pft);
+int pp2_charset_fact_define(pp2_charset_fact_t pft,
+ xmlNode *xml_node, const char *default_id);
+pp2_relevance_token_t pp2_relevance_create(pp2_charset_fact_t pft,
+ const char *id);
+void pp2_charset_fact_incref(pp2_charset_fact_t pft);
#endif
/*
char *p[512];
extract_terms(se->nmem, cn, p);
se->relevance = relevance_create(
- se->service->relevance_pct,
+ se->service->charsets,
se->nmem, (const char **) p);
}
service->z3950_session_timeout = 180;
service->z3950_operation_timeout = 30;
- service->relevance_pct = 0;
- service->sort_pct = 0;
- service->mergekey_pct = 0;
- service->facet_pct = 0;
+ service->charsets = 0;
service->id = service_id ? nmem_strdup(nmem, service_id) : 0;
service->num_metadata = num_metadata;
{
if (!pazpar2_decref(&service->ref_count, service->mutex))
{
- pp2_charset_destroy(service->relevance_pct);
- pp2_charset_destroy(service->sort_pct);
- pp2_charset_destroy(service->mergekey_pct);
- pp2_charset_destroy(service->facet_pct);
+ pp2_charset_fact_destroy(service->charsets);
yaz_mutex_destroy(&service->mutex);
nmem_destroy(service->nmem);
}
}
else if (!strcmp((const char *) n->name, "settings"))
got_settings++;
- else if (!strcmp((const char *) n->name, "relevance"))
+ else if (!strcmp((const char *) n->name, "icu_chain"))
{
- if (service->relevance_pct)
+ if (!service->charsets)
+ service->charsets = pp2_charset_fact_create();
+ if (pp2_charset_fact_define(service->charsets, n, 0))
{
- yaz_log(YLOG_LOG, "relevance may not repeat in service");
+ yaz_log(YLOG_FATAL, "ICU chain definition error");
return 0;
}
- else
- {
- service->relevance_pct = pp2_charset_create_xml(n);
- if (!service->relevance_pct)
- return 0;
- }
- }
- else if (!strcmp((const char *) n->name, "sort"))
- {
- if (service->sort_pct)
- {
- yaz_log(YLOG_LOG, "sort may not repeat in service");
- return 0;
- }
- else
- {
- service->sort_pct = pp2_charset_create_xml(n);
- if (!service->sort_pct)
- return 0;
- }
}
- else if (!strcmp((const char *) n->name, "mergekey"))
- {
- if (service->mergekey_pct)
- {
- yaz_log(YLOG_LOG, "mergekey may not repeat in service");
- return 0;
- }
- else
- {
- service->mergekey_pct = pp2_charset_create_xml(n);
- if (!service->mergekey_pct)
- return 0;
- }
- }
- else if (!strcmp((const char *) n->name, "facet"))
+ else if (!strcmp((const char *) n->name, "relevance")
+ || !strcmp((const char *) n->name, "sort")
+ || !strcmp((const char *) n->name, "mergekey")
+ || !strcmp((const char *) n->name, "facet"))
+
{
- if (service->facet_pct)
+ if (!service->charsets)
+ service->charsets = pp2_charset_fact_create();
+ if (pp2_charset_fact_define(service->charsets,
+ n->children, (const char *) n->name))
{
- yaz_log(YLOG_LOG, "facet may not repeat in service");
+ yaz_log(YLOG_FATAL, "ICU chain definition error");
return 0;
}
- else
- {
- service->facet_pct = pp2_charset_create_xml(n);
- if (!service->facet_pct)
- return 0;
- }
}
else if (!strcmp((const char *) n->name, (const char *) "metadata"))
{
/* use relevance/sort/mergekey/facet from server if not defined
for this service.. */
- if (!s->relevance_pct)
+ if (!s->charsets)
{
- if (server->relevance_pct)
+ if (server->charsets)
{
- s->relevance_pct = server->relevance_pct;
- pp2_charset_incref(s->relevance_pct);
+ s->charsets = server->charsets;
+ pp2_charset_fact_incref(s->charsets);
}
else
- s->relevance_pct = pp2_charset_create_a_to_z();
- }
-
- if (!s->sort_pct)
- {
- if (server->sort_pct)
- {
- s->sort_pct = server->sort_pct;
- pp2_charset_incref(s->sort_pct);
- }
- else
- s->sort_pct = pp2_charset_create_a_to_z();
- }
-
- if (!s->mergekey_pct)
- {
- if (server->mergekey_pct)
- {
- s->mergekey_pct = server->mergekey_pct;
- pp2_charset_incref(s->mergekey_pct);
- }
- else
- s->mergekey_pct = pp2_charset_create_a_to_z();
- }
-
- if (!s->facet_pct)
- {
- if (server->facet_pct)
{
- s->facet_pct = server->facet_pct;
- pp2_charset_incref(s->facet_pct);
+ s->charsets = pp2_charset_fact_create();
}
- else
- s->facet_pct = pp2_charset_create(0);
}
}
server->service = 0;
server->config = config;
server->next = 0;
- server->relevance_pct = 0;
- server->sort_pct = 0;
- server->mergekey_pct = 0;
- server->facet_pct = 0;
+ server->charsets = 0;
server->server_settings = 0;
server->http_server = 0;
server->iochan_man = 0;
if (!(server->server_settings = parse_settings(config, nmem, n)))
return 0;
}
- else if (!strcmp((const char *) n->name, "relevance"))
+ else if (!strcmp((const char *) n->name, "icu_chain"))
{
- server->relevance_pct = pp2_charset_create_xml(n);
- if (!server->relevance_pct)
- return 0;
- }
- else if (!strcmp((const char *) n->name, "sort"))
- {
- server->sort_pct = pp2_charset_create_xml(n);
- if (!server->sort_pct)
- return 0;
- }
- else if (!strcmp((const char *) n->name, "mergekey"))
- {
- server->mergekey_pct = pp2_charset_create_xml(n);
- if (!server->mergekey_pct)
+ if (!server->charsets)
+ server->charsets = pp2_charset_fact_create();
+ if (pp2_charset_fact_define(server->charsets, n, 0))
+ {
+ yaz_log(YLOG_FATAL, "ICU chain definition error");
return 0;
+ }
}
- else if (!strcmp((const char *) n->name, "facet"))
+ else if (!strcmp((const char *) n->name, "relevance")
+ || !strcmp((const char *) n->name, "sort")
+ || !strcmp((const char *) n->name, "mergekey")
+ || !strcmp((const char *) n->name, "facet"))
{
- server->facet_pct = pp2_charset_create_xml(n);
- if (!server->facet_pct)
+ if (!server->charsets)
+ server->charsets = pp2_charset_fact_create();
+ if (pp2_charset_fact_define(server->charsets,
+ n->children, (const char *) n->name))
+ {
+ yaz_log(YLOG_FATAL, "ICU chain definition error");
return 0;
- }
+ }
+ }
else if (!strcmp((const char *) n->name, "service"))
{
char *service_id = (char *)
service_destroy(s);
s = s_next;
}
- pp2_charset_destroy(server->relevance_pct);
- pp2_charset_destroy(server->sort_pct);
- pp2_charset_destroy(server->mergekey_pct);
- pp2_charset_destroy(server->facet_pct);
+ pp2_charset_fact_destroy(server->charsets);
yaz_log(YLOG_LOG, "server_destroy server=%p", server);
http_server_destroy(server->http_server);
}
int ref_count;
/* duplicated from conf_server */
- pp2_charset_t relevance_pct;
- pp2_charset_t sort_pct;
- pp2_charset_t mergekey_pct;
- pp2_charset_t facet_pct;
+ pp2_charset_fact_t charsets;
struct database *databases;
struct conf_server *server;
char *server_settings;
char *server_id;
- pp2_charset_t relevance_pct;
- pp2_charset_t sort_pct;
- pp2_charset_t mergekey_pct;
- pp2_charset_t facet_pct;
+ pp2_charset_fact_t charsets;
struct conf_service *service;
struct conf_server *next;
cluster->term_frequency_vec[0] += length;
}
-struct relevance *relevance_create(pp2_charset_t pct,
+struct relevance *relevance_create(pp2_charset_fact_t pft,
NMEM nmem, const char **terms)
{
struct relevance *res = nmem_malloc(nmem, sizeof(struct relevance));
res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int));
memset(res->doc_frequency_vec, 0, res->vec_len * sizeof(int));
res->nmem = nmem;
- res->prt = pp2_relevance_tokenize(pct);
+ res->prt = pp2_relevance_create(pft, "relevance");
res->entries = build_word_entries(res->prt, nmem, terms);
return res;
}
struct record_cluster;
struct reclist;
-struct relevance *relevance_create(pp2_charset_t pct,
+struct relevance *relevance_create(pp2_charset_fact_t pft,
NMEM nmem, const char **terms);
void relevance_destroy(struct relevance **rp);
void relevance_newrec(struct relevance *r, struct record_cluster *cluster);
icu_chain_id = (service->metadata + i)->icu_chain;
yaz_log(YLOG_LOG, "icu_chain id=%s", icu_chain_id ? icu_chain_id : "null");
- prt = pp2_relevance_tokenize(service->facet_pct);
-
+ if (!icu_chain_id)
+ icu_chain_id = "facet";
+ prt = pp2_relevance_create(service->charsets, icu_chain_id);
+ if (!prt)
+ {
+ yaz_log(YLOG_FATAL, "Unknown ICU chain '%s' for facet of type '%s'",
+ icu_chain_id, type);
+ wrbuf_destroy(facet_wrbuf);
+ wrbuf_destroy(display_wrbuf);
+ return;
+ }
pp2_relevance_first(prt, value, 0);
while ((facet_component = pp2_relevance_token_next(prt)))
{
{
session_log(s, YLOG_FATAL, "Too many termlists");
wrbuf_destroy(facet_wrbuf);
+ wrbuf_destroy(display_wrbuf);
return;
}
{
const char *norm_str;
pp2_relevance_token_t prt =
- pp2_relevance_tokenize(service->mergekey_pct);
+ pp2_relevance_create(service->charsets, "mergekey");
pp2_relevance_first(prt, (const char *) value, 0);
if (wrbuf_len(norm_wr) > 0)
{
const char *norm_str;
pp2_relevance_token_t prt =
- pp2_relevance_tokenize(service->mergekey_pct);
+ pp2_relevance_create(service->charsets, "mergekey");
pp2_relevance_first(prt, (const char *) mergekey, 0);
while ((norm_str = pp2_relevance_token_next(prt)))
nmem_malloc(se->nmem,
sizeof(union data_types));
- prt = pp2_relevance_tokenize(service->sort_pct);
+ prt = pp2_relevance_create(service->charsets, "sort");
pp2_relevance_first(prt, rec_md->data.text.disp,
skip_article);
</icu_chain>
</sort>
- <mergekey>
- <icu_chain locale="en">
- <tokenize rule="l"/>
- <transform rule="[[:Control:][:WhiteSpace:][:Punctuation:]] Remove"/>
- <casemap rule="l"/>
- </icu_chain>
- </mergekey>
+ <icu_chain id="mergekey" locale="en">
+ <tokenize rule="l"/>
+ <transform rule="[[:Control:][:WhiteSpace:][:Punctuation:]] Remove"/>
+ <casemap rule="l"/>
+ </icu_chain>
- <facet>
- <icu_chain locale="en">
- <transform rule="Title"/>
- </icu_chain>
- </facet>
+ <icu_chain id="facet" locale="en">
+ <transform rule="Title"/>
+ </icu_chain>
+
+ <icu_chain id="mychain" locale="en">
+ <display/>
+ <transform rule="Title"/>
+ </icu_chain>
<service>
<timeout session="30" z3950_operation="20" z3950_session="40"/>
<metadata name="isbn"/>
<metadata name="date" brief="yes" sortkey="numeric" type="year" merge="range"
termlist="yes"/>
- <metadata name="author" brief="yes" termlist="yes" merge="longest" rank="2"/>
+ <metadata name="author" brief="yes" termlist="yes"
+ merge="longest" rank="2" icu_chain="mychain"/>
<metadata name="subject" merge="unique" termlist="yes" rank="3"/>
<metadata name="id"/>
<metadata name="lccn" merge="unique"/>
<activeclients>0</activeclients>
<list name="author">
<term><name>Jack Collins</name><frequency>2</frequency></term>
-<term><name>Mairs, John W</name><frequency>1</frequency></term>
-<term><name>Wood, Helen M</name><frequency>1</frequency></term>
-<term><name>Englund, Carl R</name><frequency>1</frequency></term>
+<term><name>Mairs, John W.</name><frequency>1</frequency></term>
+<term><name>Wood, Helen M.</name><frequency>1</frequency></term>
+<term><name>Englund, Carl R.</name><frequency>1</frequency></term>
</list>
<list name="subject">
<term><name>Radioisotope Scanning</name><frequency>1</frequency></term>