X-Git-Url: http://lists.indexdata.dk/cgi-bin?a=blobdiff_plain;f=src%2Fpazpar2.c;h=2c05b8ce13a7801aa0d3cfa285dbf1c328409746;hb=e161d8d877ae9a61153f2ceb3956a48c3b0ef778;hp=31d58913269e496c6ea891a6f504022ff53897f3;hpb=63aa6af8c87677136cf3e9dca400421e6528340b;p=pazpar2-moved-to-github.git diff --git a/src/pazpar2.c b/src/pazpar2.c index 31d5891..2c05b8c 100644 --- a/src/pazpar2.c +++ b/src/pazpar2.c @@ -1,4 +1,4 @@ -/* $Id: pazpar2.c,v 1.29 2007-01-14 17:34:31 adam Exp $ */ +/* $Id: pazpar2.c,v 1.39 2007-01-16 23:42:10 quinn Exp $ */ #include #include @@ -286,10 +286,27 @@ static void do_searchResponse(IOCHAN i, Z_APDU *a) } } -char *normalize_mergekey(char *buf) +char *normalize_mergekey(char *buf, int skiparticle) { char *p = buf, *pout = buf; + if (skiparticle) + { + char firstword[64]; + char articles[] = "the den der die des an a "; // must end in space + + while (*p && !isalnum(*p)) + p++; + pout = firstword; + while (*p && *p != ' ' && pout - firstword < 62) + *(pout++) = tolower(*(p++)); + *(pout++) = ' '; + *(pout++) = '\0'; + if (!strstr(articles, firstword)) + p = buf; + pout = buf; + } + while (*p) { while (*p && !isalnum(*p)) @@ -302,7 +319,10 @@ char *normalize_mergekey(char *buf) p++; } if (buf != pout) - *pout = '\0'; + do { + *(pout--) = '\0'; + } + while (pout > buf && *pout == ' '); return buf; } @@ -347,6 +367,8 @@ static void add_facet(struct session *s, const char *type, const char *value) { int i; + if (!*value) + return; for (i = 0; i < s->num_termlists; i++) if (!strcmp(s->termlists[i].name, type)) break; @@ -469,8 +491,8 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) struct record_cluster *cluster; struct session *se = cl->session; xmlChar *mergekey, *mergekey_norm; - xmlChar *type; - xmlChar *value; + xmlChar *type = 0; + xmlChar *value = 0; struct conf_service *service = global_parameters.server->service; if (!xdoc) @@ -492,9 +514,12 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) mergekey_norm = nmem_strdup(se->nmem, (char*) mergekey); xmlFree(mergekey); - normalize_mergekey(mergekey_norm); + normalize_mergekey(mergekey_norm, 0); cluster = reclist_insert(se->reclist, res, mergekey_norm, &se->total_merged); + if (global_parameters.dump_records) + yaz_log(YLOG_LOG, "Cluster id %d from %s (#%d)", cluster->recid, + cl->database->url, cl->records); if (!cluster) { /* no room for record */ @@ -503,7 +528,6 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) } relevance_newrec(se->relevance, cluster); - type = value = 0; for (n = root->children; n; n = n->next) { if (type) @@ -517,17 +541,24 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) if (!strcmp(n->name, "metadata")) { struct conf_metadata *md = 0; + struct conf_sortkey *sk = 0; struct record_metadata **wheretoput, *newm; int imeta; int first, last; type = xmlGetProp(n, "type"); value = xmlNodeListGetString(xdoc, n->children, 0); + + if (!type || !value) + continue; + // First, find out what field we're looking at for (imeta = 0; imeta < service->num_metadata; imeta++) if (!strcmp(type, service->metadata[imeta].name)) { md = &service->metadata[imeta]; + if (md->sortkey_offset >= 0) + sk = &service->sortkeys[md->sortkey_offset]; break; } if (!md) @@ -547,7 +578,12 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) newm->next = 0; if (md->type == Metadata_type_generic) { + char *p; newm->data.text = nmem_strdup(se->nmem, value); + for (p = newm->data.text + strlen(newm->data.text) - 1; + p > newm->data.text && strchr(" ,/.", *p); p--) + *p = '\0'; + } else if (md->type == Metadata_type_year) { @@ -568,7 +604,7 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) { struct record_metadata *mnode; for (mnode = *wheretoput; mnode; mnode = mnode->next) - if (!strcmp(mnode->data.text, mnode->data.text)) + if (!strcmp(mnode->data.text, newm->data.text)) break; if (!mnode) { @@ -580,7 +616,19 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) { if (!*wheretoput || strlen(newm->data.text) > strlen((*wheretoput)->data.text)) - *wheretoput = newm; + { + *wheretoput = newm; + if (sk) + { + char *s = nmem_strdup(se->nmem, newm->data.text); + if (!cluster->sortkeys[md->sortkey_offset]) + cluster->sortkeys[md->sortkey_offset] = + nmem_malloc(se->nmem, sizeof(union data_types)); + normalize_mergekey(s, + (sk->type == Metadata_sortkey_skiparticle)); + cluster->sortkeys[md->sortkey_offset]->text = s; + } + } } else if (md->merge == Metadata_merge_all || md->merge == Metadata_merge_no) { @@ -593,16 +641,25 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) if (!*wheretoput) { *wheretoput = newm; - (*wheretoput)->data.year.year1 = first; - (*wheretoput)->data.year.year2 = last; + (*wheretoput)->data.number.min = first; + (*wheretoput)->data.number.max = last; + if (sk) + cluster->sortkeys[md->sortkey_offset] = &newm->data; } else { - if (first < (*wheretoput)->data.year.year1) - (*wheretoput)->data.year.year1 = first; - if (last > (*wheretoput)->data.year.year2) - (*wheretoput)->data.year.year2 = last; + if (first < (*wheretoput)->data.number.min) + (*wheretoput)->data.number.min = first; + if (last > (*wheretoput)->data.number.max) + (*wheretoput)->data.number.max = last; } +#ifdef GAGA + if (sk) + { + union data_types *sdata = cluster->sortkeys[md->sortkey_offset]; + yaz_log(YLOG_LOG, "SK range: %d-%d", sdata->number.min, sdata->number.max); + } +#endif } else yaz_log(YLOG_WARN, "Don't know how to merge on element name %s", md->name); @@ -618,6 +675,10 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) else yaz_log(YLOG_WARN, "Unexpected element %s in internal record", n->name); } + if (type) + xmlFree(type); + if (value) + xmlFree(value); xmlFreeDoc(xdoc); @@ -644,6 +705,7 @@ static void ingest_records(struct client *cl, Z_Records *r) { Z_NamePlusRecord *npr = rlist->records[i]; + cl->records++; if (npr->which != Z_NamePlusRecord_databaseRecord) { yaz_log(YLOG_WARN, "Unexpected record type, probably diagnostic"); @@ -685,7 +747,6 @@ static void do_presentResponse(IOCHAN i, Z_APDU *a) if (!*r->presentStatus && cl->state != Client_Error) { yaz_log(YLOG_DEBUG, "Good Present response"); - cl->records += *r->numberOfRecordsReturned; ingest_records(cl, r->records); cl->state = Client_Idle; } @@ -1321,16 +1382,24 @@ struct record_cluster *show_single(struct session *s, int id) return 0; } -struct record_cluster **show(struct session *s, int start, int *num, int *total, - int *sumhits, NMEM nmem_show) +struct record_cluster **show(struct session *s, struct reclist_sortparms *sp, int start, + int *num, int *total, int *sumhits, NMEM nmem_show) { struct record_cluster **recs = nmem_malloc(nmem_show, *num * sizeof(struct record_cluster *)); + struct reclist_sortparms *spp; int i; #if USE_TIMING yaz_timing_t t = yaz_timing_create(); #endif - relevance_prepare_read(s->relevance, s->reclist); + + for (spp = sp; spp; spp = spp->next) + if (spp->type == Metadata_sortkey_relevance) + { + relevance_prepare_read(s->relevance, s->reclist); + break; + } + reclist_sort(s->reclist, sp); *total = s->reclist->num_records; *sumhits = s->total_hits; @@ -1484,7 +1553,8 @@ int main(int argc, char **argv) " -h [host:]port (REST protocol listener)\n" " -C cclconfig\n" " -s simpletargetfile\n" - " -p hostname[:portno] (HTTP proxy)\n"); + " -p hostname[:portno] (HTTP proxy)\n" + " -d (show internal records)\n"); exit(1); } }