#include <yaz/querytowrbuf.h>
#include <yaz/oid_db.h>
#include <yaz/snprintf.h>
+#include <yaz/xml_get.h>
#define USE_TIMING 0
#if USE_TIMING
session_log(s, YLOG_DEBUG, "Session unlock by %s", caller);
}
-static void session_normalize_facet(struct session *s,
- const char *type, const char *value,
- WRBUF display_wrbuf, WRBUF facet_wrbuf)
+static int run_icu(struct session *s, const char *icu_chain_id,
+ const char *value,
+ WRBUF norm_wr, WRBUF disp_wr)
{
- struct conf_service *service = s->service;
- pp2_charset_token_t prt;
const char *facet_component;
- int i;
- const char *icu_chain_id = 0;
-
- for (i = 0; i < service->num_metadata; i++)
- if (!strcmp((service->metadata + i)->name, type))
- icu_chain_id = (service->metadata + i)->facetrule;
- if (!icu_chain_id)
- icu_chain_id = "facet";
- prt = pp2_charset_token_create(service->charsets, icu_chain_id);
+ struct conf_service *service = s->service;
+ pp2_charset_token_t prt =
+ pp2_charset_token_create(service->charsets, icu_chain_id);
if (!prt)
{
session_log(s, YLOG_FATAL,
- "Unknown ICU chain '%s' for facet of type '%s'",
- icu_chain_id, type);
- wrbuf_destroy(facet_wrbuf);
- wrbuf_destroy(display_wrbuf);
- return;
+ "Unknown ICU chain '%s'", icu_chain_id);
+ return 0;
}
pp2_charset_token_first(prt, value, 0);
while ((facet_component = pp2_charset_token_next(prt)))
const char *display_component;
if (*facet_component)
{
- if (wrbuf_len(facet_wrbuf))
- wrbuf_puts(facet_wrbuf, " ");
- wrbuf_puts(facet_wrbuf, facet_component);
+ if (wrbuf_len(norm_wr))
+ wrbuf_puts(norm_wr, " ");
+ wrbuf_puts(norm_wr, facet_component);
}
display_component = pp2_get_display(prt);
if (display_component)
{
- if (wrbuf_len(display_wrbuf))
- wrbuf_puts(display_wrbuf, " ");
- wrbuf_puts(display_wrbuf, display_component);
+ if (wrbuf_len(disp_wr))
+ wrbuf_puts(disp_wr, " ");
+ wrbuf_puts(disp_wr, display_component);
}
}
pp2_charset_token_destroy(prt);
+ return 1;
+}
+
+static void session_normalize_facet(struct session *s,
+ const char *type, const char *value,
+ WRBUF display_wrbuf, WRBUF facet_wrbuf)
+{
+ struct conf_service *service = s->service;
+ int i;
+ const char *icu_chain_id = 0;
+
+ for (i = 0; i < service->num_metadata; i++)
+ if (!strcmp((service->metadata + i)->name, type))
+ icu_chain_id = (service->metadata + i)->facetrule;
+ if (!icu_chain_id)
+ icu_chain_id = "facet";
+
+ run_icu(s, icu_chain_id, value, facet_wrbuf, display_wrbuf);
}
void add_facet(struct session *s, const char *type, const char *value, int count)
return session;
}
-const char * client_get_suggestions_xml(struct client *cl, WRBUF wrbuf);
-
static struct hitsbytarget *hitsbytarget_nb(struct session *se,
int *count, NMEM nmem)
{
session_settings_dump(se, client_get_database(cl), w);
res[*count].settings_xml = nmem_strdup(nmem, wrbuf_cstr(w));
wrbuf_rewind(w);
- wrbuf_puts(w, "");
- res[*count].suggestions_xml = nmem_strdup(nmem, client_get_suggestions_xml(cl, w));
+ res[*count].suggestions_xml =
+ nmem_strdup(nmem, client_get_suggestions_xml(cl, w));
wrbuf_destroy(w);
(*count)++;
}
void perform_termlist(struct http_channel *c, struct session *se,
const char *name, int num, int version)
{
- int i, j;
+ int j;
NMEM nmem_tmp = nmem_create();
char **names;
int num_names = 0;
}
static struct record_metadata *record_metadata_init(
- NMEM nmem, const char *value, enum conf_metadata_type type,
+ NMEM nmem, const char *value, const char *norm,
+ enum conf_metadata_type type,
struct _xmlAttr *attr)
{
struct record_metadata *rec_md = record_metadata_create(nmem);
{
case Metadata_type_generic:
case Metadata_type_skiparticle:
- if (strstr(value, "://")) /* looks like a URL */
+ if (norm)
+ {
rec_md->data.text.disp = nmem_strdup(nmem, value);
+ rec_md->data.text.norm = nmem_strdup(nmem, norm);
+ }
else
- rec_md->data.text.disp =
- normalize7bit_generic(nmem_strdup(nmem, value), " ,/.:([");
+ {
+ if (strstr(value, "://")) /* looks like a URL */
+ rec_md->data.text.disp = nmem_strdup(nmem, value);
+ else
+ rec_md->data.text.disp =
+ normalize7bit_generic(nmem_strdup(nmem, value), " ,/.:([");
+ rec_md->data.text.norm = rec_md->data.text.disp;
+ }
rec_md->data.text.sort = 0;
rec_md->data.text.snippet = 0;
break;
continue;
if (!strcmp((const char *) n->name, "metadata"))
{
- xmlChar *type = xmlGetProp(n, (xmlChar *) "type");
+ const char *type = yaz_xml_get_prop(n, "type");
if (type == NULL) {
yaz_log(YLOG_FATAL, "Missing type attribute on metadata element. Skipping!");
}
if (value)
xmlFree(value);
}
- xmlFree(type);
}
}
return no_found;
{
char *mergekey_norm = 0;
WRBUF norm_wr = wrbuf_alloc();
- xmlChar *mergekey;
+ const char *mergekey;
if (session_mergekey)
{
for (i = 0; i < num; i++)
get_mergekey_from_doc(doc, root, values[i], service, norm_wr);
}
- else if ((mergekey = xmlGetProp(root, (xmlChar *) "mergekey")))
+ else if ((mergekey = yaz_xml_get_prop(root, "mergekey")))
{
- mergekey_norm_wr(service->charsets, norm_wr, (const char *) mergekey);
- xmlFree(mergekey);
+ mergekey_norm_wr(service->charsets, norm_wr, mergekey);
}
else
{
continue;
if (!strcmp((const char *) n->name, "metadata"))
{
- xmlChar *type = xmlGetProp(n, (xmlChar *) "type");
+ const char *type = yaz_xml_get_prop(n, "type");
if (type)
{
size_t len;
}
xmlFree(value);
}
- xmlFree(type);
}
}
}
}
static int ingest_to_cluster(struct client *cl,
+ WRBUF wrbuf_disp,
+ WRBUF wrbuf_norm,
xmlDoc *xdoc,
xmlNode *root,
int record_no,
{
int ret = 0;
struct session *se = client_get_session(cl);
+ WRBUF wrbuf_disp, wrbuf_norm;
if (!check_record_filter(root, sdb))
{
record_no, sdb->database->id);
return 0;
}
+ wrbuf_disp = wrbuf_alloc();
+ wrbuf_norm = wrbuf_alloc();
session_enter(se, "ingest_sub_record");
if (client_get_session(cl) == se && se->relevance)
- ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekeys);
+ ret = ingest_to_cluster(cl, wrbuf_disp, wrbuf_norm,
+ xdoc, root, record_no, mergekeys);
session_leave(se, "ingest_sub_record");
-
+ wrbuf_destroy(wrbuf_norm);
+ wrbuf_destroy(wrbuf_disp);
return ret;
}
}
static int ingest_to_cluster(struct client *cl,
+ WRBUF wrbuf_disp,
+ WRBUF wrbuf_norm,
xmlDoc *xdoc,
xmlNode *root,
int record_no,
struct record_metadata_attr *merge_keys)
{
xmlNode *n;
- xmlChar *type = 0;
- xmlChar *value = 0;
struct session *se = client_get_session(cl);
struct conf_service *service = se->service;
int term_factor = 1;
for (n = root->children; n; n = n->next)
{
- if (type)
- xmlFree(type);
- if (value)
- xmlFree(value);
- type = value = 0;
-
if (n->type != XML_ELEMENT_NODE)
continue;
if (!strcmp((const char *) n->name, "metadata"))
struct record_metadata **wheretoput = 0;
struct record_metadata *rec_md = 0;
int md_field_id = -1;
+ xmlChar *value0;
+ const char *type = yaz_xml_get_prop(n, "type");
- type = xmlGetProp(n, (xmlChar *) "type");
- value = xmlNodeListGetString(xdoc, n->children, 1);
if (!type)
continue;
- if (!value || !*value)
- {
- xmlChar *empty = xmlGetProp(n, (xmlChar *) "empty");
- if (!empty)
- continue;
- if (value)
- xmlFree(value);
- value = empty;
- }
+
md_field_id
= conf_service_metadata_field_id(service, (const char *) type);
if (md_field_id < 0)
continue;
}
+ wrbuf_rewind(wrbuf_disp);
+ value0 = xmlNodeListGetString(xdoc, n->children, 1);
+ if (!value0 || !*value0)
+ {
+ const char *empty = yaz_xml_get_prop(n, "empty");
+ if (!empty)
+ continue;
+ wrbuf_puts(wrbuf_disp, (const char *) empty);
+ }
+ else
+ {
+ wrbuf_puts(wrbuf_disp, (const char *) value0);
+ }
+ if (value0)
+ xmlFree(value0);
ser_md = &service->metadata[md_field_id];
// non-merged metadata
- rec_md = record_metadata_init(se->nmem, (const char *) value,
+ rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp), 0,
ser_md->type, n->properties);
if (!rec_md)
{
session_log(se, YLOG_WARN, "bad metadata data '%s' "
- "for element '%s'", value, type);
+ "for element '%s'", wrbuf_cstr(wrbuf_disp), type);
continue;
}
{
WRBUF w = wrbuf_alloc();
if (relevance_snippet(se->relevance,
- (char*) value, ser_md->name, w))
+ wrbuf_cstr(wrbuf_disp), ser_md->name, w))
rec_md->data.text.snippet = nmem_strdup(se->nmem,
wrbuf_cstr(w));
wrbuf_destroy(w);
if (check_limit_local(cl, record, record_no))
{
- if (type)
- xmlFree(type);
- if (value)
- xmlFree(value);
return -2;
}
cluster = reclist_insert(se->reclist, se->relevance, service, record,
merge_keys, &se->total_merged);
if (!cluster)
{
- if (type)
- xmlFree(type);
- if (value)
- xmlFree(value);
return 0; // complete match with existing record
}
// now parsing XML record and adding data to cluster or record metadata
for (n = root->children; n; n = n->next)
{
- pp2_charset_token_t prt;
- if (type)
- xmlFree(type);
- if (value)
- xmlFree(value);
- type = value = 0;
-
if (n->type != XML_ELEMENT_NODE)
continue;
if (!strcmp((const char *) n->name, "metadata"))
int md_field_id = -1;
int sk_field_id = -1;
const char *rank = 0;
- xmlChar *xml_rank = 0;
-
- type = xmlGetProp(n, (xmlChar *) "type");
- value = xmlNodeListGetString(xdoc, n->children, 1);
+ const char *xml_rank = 0;
+ const char *type = 0;
+ xmlChar *value0;
- if (!type || !value || !*value)
+ type = yaz_xml_get_prop(n, "type");
+ if (!type)
continue;
md_field_id
ser_sk = &service->sortkeys[sk_field_id];
}
- // merged metadata
- rec_md = record_metadata_init(se->nmem, (const char *) value,
- ser_md->type, 0);
+ wrbuf_rewind(wrbuf_disp);
+ wrbuf_rewind(wrbuf_norm);
- // see if the field was not in cluster already (from beginning)
+ value0 = xmlNodeListGetString(xdoc, n->children, 1);
+ if (!value0 || !*value0)
+ {
+ if (value0)
+ xmlFree(value0);
+ continue;
+ }
+
+ if (ser_md->icurule)
+ {
+ run_icu(se, ser_md->icurule, (const char *) value0,
+ wrbuf_norm, wrbuf_disp);
+ yaz_log(YLOG_LOG, "run_icu input=%s norm=%s disp=%s",
+ (const char *) value0,
+ wrbuf_cstr(wrbuf_norm), wrbuf_cstr(wrbuf_disp));
+ rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp),
+ wrbuf_cstr(wrbuf_norm),
+ ser_md->type, 0);
+ }
+ else
+ {
+ wrbuf_puts(wrbuf_disp, (const char *) value0);
+ rec_md = record_metadata_init(se->nmem, wrbuf_cstr(wrbuf_disp),
+ 0,
+ ser_md->type, 0);
+ }
+ xmlFree(value0);
+
+ // see if the field was not in cluster already (from beginning)
if (!rec_md)
continue;
}
else
{
- xml_rank = xmlGetProp(n, (xmlChar *) "rank");
+ xml_rank = yaz_xml_get_prop(n, "rank");
rank = xml_rank ? (const char *) xml_rank : ser_md->rank;
}
{
while (*wheretoput)
{
- if (!strcmp((const char *) (*wheretoput)->data.text.disp,
- rec_md->data.text.disp))
+ if (!strcmp((const char *) (*wheretoput)->data.text.norm,
+ rec_md->data.text.norm))
break;
wheretoput = &(*wheretoput)->next;
}
else if (ser_md->merge == Metadata_merge_longest)
{
if (!*wheretoput
- || strlen(rec_md->data.text.disp)
- > strlen((*wheretoput)->data.text.disp))
+ || strlen(rec_md->data.text.norm)
+ > strlen((*wheretoput)->data.text.norm))
{
*wheretoput = rec_md;
if (ser_sk)
{
+ pp2_charset_token_t prt;
const char *sort_str = 0;
int skip_article =
ser_sk->type == Metadata_type_skiparticle;
if (rank)
{
relevance_countwords(se->relevance, cluster,
- (char *) value, rank, ser_md->name);
+ wrbuf_cstr(wrbuf_disp),
+ rank, ser_md->name);
}
// construct facets ... unless the client already has reported them
if (ser_md->termlist && !client_has_facet(cl, (char *) type))
}
}
else
- add_facet(se, (char *) type, (char *) value, term_factor);
+ add_facet(se, type, wrbuf_cstr(wrbuf_disp), term_factor);
}
-
- // cleaning up
- if (xml_rank)
- xmlFree(xml_rank);
- xmlFree(type);
- xmlFree(value);
- type = value = 0;
}
else
{
se->number_of_warnings_unknown_elements++;
}
}
- if (type)
- xmlFree(type);
- if (value)
- xmlFree(value);
-
nmem_destroy(ingest_nmem);
xfree(metadata0);
relevance_donerecord(se->relevance, cluster);