-/* $Id: pazpar2.c,v 1.29 2007-01-14 17:34:31 adam Exp $ */
+/* $Id: pazpar2.c,v 1.36 2007-01-16 05:29:48 quinn Exp $ */
#include <stdlib.h>
#include <stdio.h>
}
}
-char *normalize_mergekey(char *buf)
+char *normalize_mergekey(char *buf, int skiparticle)
{
char *p = buf, *pout = buf;
+ if (skiparticle)
+ {
+ char firstword[64];
+ char articles[] = "the den der die des an a "; // must end in space
+
+ while (*p && !isalnum(*p))
+ p++;
+ pout = firstword;
+ while (*p && *p != ' ' && pout - firstword < 62)
+ *(pout++) = tolower(*(p++));
+ *(pout++) = ' ';
+ *(pout++) = '\0';
+ if (!strstr(articles, firstword))
+ p = buf;
+ pout = buf;
+ }
+
while (*p)
{
while (*p && !isalnum(*p))
p++;
}
if (buf != pout)
- *pout = '\0';
+ do {
+ *(pout--) = '\0';
+ }
+ while (pout > buf && *pout == ' ');
return buf;
}
{
int i;
+ if (!*value)
+ return;
for (i = 0; i < s->num_termlists; i++)
if (!strcmp(s->termlists[i].name, type))
break;
mergekey_norm = nmem_strdup(se->nmem, (char*) mergekey);
xmlFree(mergekey);
- normalize_mergekey(mergekey_norm);
+ normalize_mergekey(mergekey_norm, 0);
cluster = reclist_insert(se->reclist, res, mergekey_norm, &se->total_merged);
if (!cluster)
if (!strcmp(n->name, "metadata"))
{
struct conf_metadata *md = 0;
+ struct conf_sortkey *sk = 0;
struct record_metadata **wheretoput, *newm;
int imeta;
int first, last;
if (!strcmp(type, service->metadata[imeta].name))
{
md = &service->metadata[imeta];
+ if (md->sortkey_offset >= 0)
+ sk = &service->sortkeys[md->sortkey_offset];
break;
}
if (!md)
newm->next = 0;
if (md->type == Metadata_type_generic)
{
+ char *p;
newm->data.text = nmem_strdup(se->nmem, value);
+ for (p = newm->data.text + strlen(newm->data.text) - 1;
+ p > newm->data.text && strchr(" ,/.", *p); p--)
+ *p = '\0';
+
}
else if (md->type == Metadata_type_year)
{
{
struct record_metadata *mnode;
for (mnode = *wheretoput; mnode; mnode = mnode->next)
- if (!strcmp(mnode->data.text, mnode->data.text))
+ if (!strcmp(mnode->data.text, newm->data.text))
break;
if (!mnode)
{
{
if (!*wheretoput ||
strlen(newm->data.text) > strlen((*wheretoput)->data.text))
- *wheretoput = newm;
+ {
+ *wheretoput = newm;
+ if (sk)
+ {
+ char *s = nmem_strdup(se->nmem, newm->data.text);
+ if (!cluster->sortkeys[md->sortkey_offset])
+ cluster->sortkeys[md->sortkey_offset] =
+ nmem_malloc(se->nmem, sizeof(union data_types));
+ normalize_mergekey(s,
+ (sk->type == Metadata_sortkey_skiparticle));
+ cluster->sortkeys[md->sortkey_offset]->text = s;
+ }
+ }
}
else if (md->merge == Metadata_merge_all || md->merge == Metadata_merge_no)
{
if (!*wheretoput)
{
*wheretoput = newm;
- (*wheretoput)->data.year.year1 = first;
- (*wheretoput)->data.year.year2 = last;
+ (*wheretoput)->data.number.min = first;
+ (*wheretoput)->data.number.max = last;
+ if (sk)
+ cluster->sortkeys[md->sortkey_offset] = &newm->data;
}
else
{
- if (first < (*wheretoput)->data.year.year1)
- (*wheretoput)->data.year.year1 = first;
- if (last > (*wheretoput)->data.year.year2)
- (*wheretoput)->data.year.year2 = last;
+ if (first < (*wheretoput)->data.number.min)
+ (*wheretoput)->data.number.min = first;
+ if (last > (*wheretoput)->data.number.max)
+ (*wheretoput)->data.number.max = last;
+ }
+#ifdef GAGA
+ if (sk)
+ {
+ union data_types *sdata = cluster->sortkeys[md->sortkey_offset];
+ yaz_log(YLOG_LOG, "SK range: %d-%d", sdata->number.min, sdata->number.max);
}
+#endif
}
else
yaz_log(YLOG_WARN, "Don't know how to merge on element name %s", md->name);
return 0;
}
-struct record_cluster **show(struct session *s, int start, int *num, int *total,
- int *sumhits, NMEM nmem_show)
+struct record_cluster **show(struct session *s, struct reclist_sortparms *sp, int start,
+ int *num, int *total, int *sumhits, NMEM nmem_show)
{
struct record_cluster **recs = nmem_malloc(nmem_show, *num
* sizeof(struct record_cluster *));
+ struct reclist_sortparms *spp;
int i;
#if USE_TIMING
yaz_timing_t t = yaz_timing_create();
#endif
- relevance_prepare_read(s->relevance, s->reclist);
+
+ for (spp = sp; spp; spp = spp->next)
+ if (spp->type == Metadata_sortkey_relevance)
+ {
+ relevance_prepare_read(s->relevance, s->reclist);
+ break;
+ }
+ reclist_sort(s->reclist, sp);
*total = s->reclist->num_records;
*sumhits = s->total_hits;