-/* $Id: pazpar2.c,v 1.29 2007-01-14 17:34:31 adam Exp $ */
+/* $Id: pazpar2.c,v 1.39 2007-01-16 23:42:10 quinn Exp $ */
#include <stdlib.h>
#include <stdio.h>
}
}
-char *normalize_mergekey(char *buf)
+char *normalize_mergekey(char *buf, int skiparticle)
{
char *p = buf, *pout = buf;
+ if (skiparticle)
+ {
+ char firstword[64];
+ char articles[] = "the den der die des an a "; // must end in space
+
+ while (*p && !isalnum(*p))
+ p++;
+ pout = firstword;
+ while (*p && *p != ' ' && pout - firstword < 62)
+ *(pout++) = tolower(*(p++));
+ *(pout++) = ' ';
+ *(pout++) = '\0';
+ if (!strstr(articles, firstword))
+ p = buf;
+ pout = buf;
+ }
+
while (*p)
{
while (*p && !isalnum(*p))
p++;
}
if (buf != pout)
- *pout = '\0';
+ do {
+ *(pout--) = '\0';
+ }
+ while (pout > buf && *pout == ' ');
return buf;
}
{
int i;
+ if (!*value)
+ return;
for (i = 0; i < s->num_termlists; i++)
if (!strcmp(s->termlists[i].name, type))
break;
struct record_cluster *cluster;
struct session *se = cl->session;
xmlChar *mergekey, *mergekey_norm;
- xmlChar *type;
- xmlChar *value;
+ xmlChar *type = 0;
+ xmlChar *value = 0;
struct conf_service *service = global_parameters.server->service;
if (!xdoc)
mergekey_norm = nmem_strdup(se->nmem, (char*) mergekey);
xmlFree(mergekey);
- normalize_mergekey(mergekey_norm);
+ normalize_mergekey(mergekey_norm, 0);
cluster = reclist_insert(se->reclist, res, mergekey_norm, &se->total_merged);
+ if (global_parameters.dump_records)
+ yaz_log(YLOG_LOG, "Cluster id %d from %s (#%d)", cluster->recid,
+ cl->database->url, cl->records);
if (!cluster)
{
/* no room for record */
}
relevance_newrec(se->relevance, cluster);
- type = value = 0;
for (n = root->children; n; n = n->next)
{
if (type)
if (!strcmp(n->name, "metadata"))
{
struct conf_metadata *md = 0;
+ struct conf_sortkey *sk = 0;
struct record_metadata **wheretoput, *newm;
int imeta;
int first, last;
type = xmlGetProp(n, "type");
value = xmlNodeListGetString(xdoc, n->children, 0);
+
+ if (!type || !value)
+ continue;
+
// First, find out what field we're looking at
for (imeta = 0; imeta < service->num_metadata; imeta++)
if (!strcmp(type, service->metadata[imeta].name))
{
md = &service->metadata[imeta];
+ if (md->sortkey_offset >= 0)
+ sk = &service->sortkeys[md->sortkey_offset];
break;
}
if (!md)
newm->next = 0;
if (md->type == Metadata_type_generic)
{
+ char *p;
newm->data.text = nmem_strdup(se->nmem, value);
+ for (p = newm->data.text + strlen(newm->data.text) - 1;
+ p > newm->data.text && strchr(" ,/.", *p); p--)
+ *p = '\0';
+
}
else if (md->type == Metadata_type_year)
{
{
struct record_metadata *mnode;
for (mnode = *wheretoput; mnode; mnode = mnode->next)
- if (!strcmp(mnode->data.text, mnode->data.text))
+ if (!strcmp(mnode->data.text, newm->data.text))
break;
if (!mnode)
{
{
if (!*wheretoput ||
strlen(newm->data.text) > strlen((*wheretoput)->data.text))
- *wheretoput = newm;
+ {
+ *wheretoput = newm;
+ if (sk)
+ {
+ char *s = nmem_strdup(se->nmem, newm->data.text);
+ if (!cluster->sortkeys[md->sortkey_offset])
+ cluster->sortkeys[md->sortkey_offset] =
+ nmem_malloc(se->nmem, sizeof(union data_types));
+ normalize_mergekey(s,
+ (sk->type == Metadata_sortkey_skiparticle));
+ cluster->sortkeys[md->sortkey_offset]->text = s;
+ }
+ }
}
else if (md->merge == Metadata_merge_all || md->merge == Metadata_merge_no)
{
if (!*wheretoput)
{
*wheretoput = newm;
- (*wheretoput)->data.year.year1 = first;
- (*wheretoput)->data.year.year2 = last;
+ (*wheretoput)->data.number.min = first;
+ (*wheretoput)->data.number.max = last;
+ if (sk)
+ cluster->sortkeys[md->sortkey_offset] = &newm->data;
}
else
{
- if (first < (*wheretoput)->data.year.year1)
- (*wheretoput)->data.year.year1 = first;
- if (last > (*wheretoput)->data.year.year2)
- (*wheretoput)->data.year.year2 = last;
+ if (first < (*wheretoput)->data.number.min)
+ (*wheretoput)->data.number.min = first;
+ if (last > (*wheretoput)->data.number.max)
+ (*wheretoput)->data.number.max = last;
}
+#ifdef GAGA
+ if (sk)
+ {
+ union data_types *sdata = cluster->sortkeys[md->sortkey_offset];
+ yaz_log(YLOG_LOG, "SK range: %d-%d", sdata->number.min, sdata->number.max);
+ }
+#endif
}
else
yaz_log(YLOG_WARN, "Don't know how to merge on element name %s", md->name);
else
yaz_log(YLOG_WARN, "Unexpected element %s in internal record", n->name);
}
+ if (type)
+ xmlFree(type);
+ if (value)
+ xmlFree(value);
xmlFreeDoc(xdoc);
{
Z_NamePlusRecord *npr = rlist->records[i];
+ cl->records++;
if (npr->which != Z_NamePlusRecord_databaseRecord)
{
yaz_log(YLOG_WARN, "Unexpected record type, probably diagnostic");
if (!*r->presentStatus && cl->state != Client_Error)
{
yaz_log(YLOG_DEBUG, "Good Present response");
- cl->records += *r->numberOfRecordsReturned;
ingest_records(cl, r->records);
cl->state = Client_Idle;
}
return 0;
}
-struct record_cluster **show(struct session *s, int start, int *num, int *total,
- int *sumhits, NMEM nmem_show)
+struct record_cluster **show(struct session *s, struct reclist_sortparms *sp, int start,
+ int *num, int *total, int *sumhits, NMEM nmem_show)
{
struct record_cluster **recs = nmem_malloc(nmem_show, *num
* sizeof(struct record_cluster *));
+ struct reclist_sortparms *spp;
int i;
#if USE_TIMING
yaz_timing_t t = yaz_timing_create();
#endif
- relevance_prepare_read(s->relevance, s->reclist);
+
+ for (spp = sp; spp; spp = spp->next)
+ if (spp->type == Metadata_sortkey_relevance)
+ {
+ relevance_prepare_read(s->relevance, s->reclist);
+ break;
+ }
+ reclist_sort(s->reclist, sp);
*total = s->reclist->num_records;
*sumhits = s->total_hits;
" -h [host:]port (REST protocol listener)\n"
" -C cclconfig\n"
" -s simpletargetfile\n"
- " -p hostname[:portno] (HTTP proxy)\n");
+ " -p hostname[:portno] (HTTP proxy)\n"
+ " -d (show internal records)\n");
exit(1);
}
}