From 77b4d51d81340f3374ae8a97aea0177a0fbcd336 Mon Sep 17 00:00:00 2001 From: Sebastian Hammer Date: Mon, 8 Jan 2007 18:32:35 +0000 Subject: [PATCH] Metadata elements dynamically created from XSLT normalization output. Configure file controls merging and termlist extraction. Merge criteria supported: all-fields, longest (strlen), unique. Configure also controls which fields are included in overview format (full view not yet implemented). Protocol change: Metadata field names now prefixed by 'md-' in the webservice protocol (i.e. md-title). Implemented in test1 prototype. Todo: Date range normalization/merge, sortkey extraction, full record view. --- PROTOCOL | 6 +-- README | 3 +- etc/marc21.xsl | 19 +++++--- etc/pazpar2.cfg | 10 ++-- src/config.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++---- src/config.h | 12 ++--- src/http_command.c | 32 +++++++++++-- src/pazpar2.c | 131 +++++++++++++++++++++++++++++++++++++++------------ src/pazpar2.h | 32 ++++++++----- src/reclists.c | 55 ++++++++++++--------- src/reclists.h | 7 +-- src/relevance.c | 16 +++---- src/relevance.h | 6 +-- src/termlists.c | 6 ++- www/test1/search.js | 4 +- 15 files changed, 348 insertions(+), 121 deletions(-) diff --git a/PROTOCOL b/PROTOCOL index 4ddfd03..0999252 100644 --- a/PROTOCOL +++ b/PROTOCOL @@ -109,14 +109,14 @@ Output: 0 2 - How to program a computer, by Jack Collins + How to program a computer, by Jack Collins 2 - + <md-title> Computer processing of dynamic images from an Anger scintillation camera : the proceedings of a workshop / - + diff --git a/README b/README index 2d7d528..3a29464 100644 --- a/README +++ b/README @@ -1,7 +1,6 @@ How to use this: -Usage: pazpar2 -h [listen-host:]listen-port -p host-to-proxy -s targetfile \ - -x normalization-stylesheet +Usage: pazpar2 -f configfile -h [listen-host:]listen-port -p host-to-proxy -s targetfile The host-to-proxy is the webserver where the user interface script lives. It is used both for domain-name lookup and the contents of diff --git a/etc/marc21.xsl b/etc/marc21.xsl index bcb30e7..cd20702 100644 --- a/etc/marc21.xsl +++ b/etc/marc21.xsl @@ -16,26 +16,33 @@ - + + - + - + - + - + + - diff --git a/etc/pazpar2.cfg b/etc/pazpar2.cfg index 3ccbda5..2b84e08 100644 --- a/etc/pazpar2.cfg +++ b/etc/pazpar2.cfg @@ -1,16 +1,14 @@ - - - + + + + diff --git a/src/config.c b/src/config.c index 2c76882..46c3f64 100644 --- a/src/config.c +++ b/src/config.c @@ -1,4 +1,4 @@ -/* $Id: config.c,v 1.4 2007-01-08 12:43:41 adam Exp $ */ +/* $Id: config.c,v 1.5 2007-01-08 18:32:35 quinn Exp $ */ #include @@ -30,25 +30,125 @@ static struct conf_service *parse_service(xmlNode *node) { xmlNode *n; struct conf_service *r = nmem_malloc(nmem, sizeof(struct conf_service)); + int num_metadata = 0; + int md_node = 0; - r->termlists = 0; + // Allocate array of conf metadata structs, if necessary + for (n = node->children; n; n = n->next) + if (n->type == XML_ELEMENT_NODE && !strcmp(n->name, "metadata")) + num_metadata++; + if (num_metadata) + r->metadata = nmem_malloc(nmem, sizeof(struct conf_metadata) * num_metadata); + r->num_metadata = num_metadata; for (n = node->children; n; n = n->next) { if (n->type != XML_ELEMENT_NODE) continue; - if (!strcmp(n->name, "termlist")) + if (!strcmp(n->name, "metadata")) { - struct conf_termlist *tl = nmem_malloc(nmem, sizeof(struct conf_termlist)); + struct conf_metadata *md = &r->metadata[md_node]; xmlChar *name = xmlGetProp(n, "name"); + xmlChar *brief = xmlGetProp(n, "brief"); + xmlChar *sortkey = xmlGetProp(n, "sortkey"); + xmlChar *merge = xmlGetProp(n, "merge"); + xmlChar *type = xmlGetProp(n, "type"); + xmlChar *termlist = xmlGetProp(n, "termlist"); + if (!name) { - yaz_log(YLOG_WARN, "Missing name attribute in termlist"); - continue; + yaz_log(YLOG_FATAL, "Must specify name in metadata element"); + return 0; + } + md->name = nmem_strdup(nmem, name); + if (brief) + { + if (!strcmp(brief, "yes")) + md->brief = 1; + else if (strcmp(brief, "no")) + { + yaz_log(YLOG_FATAL, "metadata/brief must be yes or no"); + return 0; + } + } + else + md->brief = 0; + + if (termlist) + { + if (!strcmp(termlist, "yes")) + md->termlist = 1; + else if (strcmp(termlist, "no")) + { + yaz_log(YLOG_FATAL, "metadata/termlist must be yes or no"); + return 0; + } + } + else + md->termlist = 0; + + if (type) + { + if (!strcmp(type, "generic")) + md->type = Metadata_type_generic; + else if (!strcmp(type, "integer")) + md->type = Metadata_type_integer; + else if (!strcmp(type, "year")) + md->type = Metadata_type_year; + else + { + yaz_log(YLOG_FATAL, "Unknown value for metadata/type: %s", type); + return 0; + } + } + md->type = Metadata_type_generic; + + if (sortkey) + { + if (!strcmp(sortkey, "no")) + md->sortkey = Metadata_sortkey_no; + else if (!strcmp(sortkey, "numeric")) + md->sortkey = Metadata_sortkey_numeric; + else if (!strcmp(sortkey, "range")) + md->sortkey = Metadata_sortkey_range; + else if (!strcmp(sortkey, "skiparticle")) + md->sortkey = Metadata_sortkey_skiparticle; + else + { + yaz_log(YLOG_FATAL, "Unknown sortkey in metadata element: %s", sortkey); + return 0; + } } - tl->name = nmem_strdup(nmem, name); - tl->next = r->termlists; - r->termlists = tl; + else + md->sortkey = Metadata_sortkey_no; + + if (merge) + { + if (!strcmp(merge, "no")) + md->merge = Metadata_merge_no; + else if (!strcmp(merge, "unique")) + md->merge = Metadata_merge_unique; + else if (!strcmp(merge, "longest")) + md->merge = Metadata_merge_longest; + else if (!strcmp(merge, "range")) + md->merge = Metadata_merge_range; + else if (!strcmp(merge, "all")) + md->merge = Metadata_merge_all; + else + { + yaz_log(YLOG_FATAL, "Unknown value for metadata/merge: %s", merge); + return 0; + } + } + else + md->merge = Metadata_merge_no; + + xmlFree(name); + xmlFree(brief); + xmlFree(sortkey); + xmlFree(merge); + xmlFree(termlist); + md_node++; } else { @@ -83,6 +183,8 @@ static struct conf_server *parse_server(xmlNode *node) r->port = atoi(port); if (host) r->host = nmem_strdup(nmem, host); + xmlFree(port); + xmlFree(host); } else if (!strcmp(n->name, "proxy")) { @@ -92,6 +194,8 @@ static struct conf_server *parse_server(xmlNode *node) r->proxy_port = atoi(port); if (host) r->proxy_host = nmem_strdup(nmem, host); + xmlFree(port); + xmlFree(host); } else if (!strcmp(n->name, "service")) { @@ -204,6 +308,10 @@ static struct conf_retrievalprofile *parse_retrievalprofile(xmlNode *node) return 0; } } + xmlFree(name); + xmlFree(format); + xmlFree(encoding); + xmlFree(mapto); } else if (!strcmp(n->name, "map")) { @@ -234,6 +342,10 @@ static struct conf_retrievalprofile *parse_retrievalprofile(xmlNode *node) } *rm = m; rm = &m->next; + xmlFree(type); + xmlFree(charset); + xmlFree(format); + xmlFree(stylesheet); } else { diff --git a/src/config.h b/src/config.h index e3dda3d..346707a 100644 --- a/src/config.h +++ b/src/config.h @@ -5,17 +5,12 @@ #include #include -struct conf_termlist -{ - char *name; - struct conf_termlist *next; -}; - // Describes known metadata elements and how they are to be manipulated struct conf_metadata { char *name; // The name of this element. Output by normalization stylesheet int brief; // Is this element to be returned in the brief format? + int termlist;// Is this field to be treated as a termlist for browsing? enum { Metadata_type_generic, // Generic text field @@ -34,13 +29,14 @@ struct conf_metadata Metadata_merge_no, // Don't merge Metadata_merge_unique, // Include unique elements in merged block Metadata_merge_longest, // Include the longest (strlen) value - Metadata_merge_range // Store value as a range of lowest-highest + Metadata_merge_range, // Store value as a range of lowest-highest + Metadata_merge_all // Just include all elements found } merge; }; struct conf_service { - struct conf_termlist *termlists; + int num_metadata; struct conf_metadata *metadata; }; diff --git a/src/http_command.c b/src/http_command.c index 4068cc8..66c903c 100644 --- a/src/http_command.c +++ b/src/http_command.c @@ -1,5 +1,5 @@ /* - * $Id: http_command.c,v 1.9 2007-01-08 12:43:41 adam Exp $ + * $Id: http_command.c,v 1.10 2007-01-08 18:32:35 quinn Exp $ */ #include @@ -275,7 +275,7 @@ static void show_records(struct http_channel *c, int active) struct http_request *rq = c->request; struct http_response *rs = c->response; struct http_session *s = locate_session(rq, rs); - struct record **rl; + struct record_cluster **rl; NMEM nmem_show; char *start = http_argbyname(rq, "start"); char *num = http_argbyname(rq, "num"); @@ -312,10 +312,34 @@ static void show_records(struct http_channel *c, int active) { int ccount; struct record *p; + struct record_cluster *rec = rl[i]; + struct conf_service *service = global_parameters.server->service; + int imeta; wrbuf_puts(c->wrbuf, "\n"); - wrbuf_printf(c->wrbuf, "%s\n", rl[i]->title); - for (ccount = 1, p = rl[i]->next_cluster; p; p = p->next_cluster, ccount++) + for (imeta = 0; imeta < service->num_metadata; imeta++) + { + struct conf_metadata *cmd = &service->metadata[imeta]; + struct record_metadata *md; + if (!rec->metadata[imeta]) + continue; + if (!cmd->brief) + continue; + for (md = rec->metadata[imeta]; md; md = md->next) + { + wrbuf_printf(c->wrbuf, "", cmd->name); + switch (cmd->type) + { + case Metadata_type_generic: + wrbuf_puts(c->wrbuf, md->data.text); + break; + default: + wrbuf_puts(c->wrbuf, "[Can't represent]"); + } + wrbuf_printf(c->wrbuf, "", cmd->name); + } + } + for (ccount = 0, p = rl[i]->records; p; p = p->next, ccount++) ; if (ccount > 1) wrbuf_printf(c->wrbuf, "%d\n", ccount); diff --git a/src/pazpar2.c b/src/pazpar2.c index 18d0f0a..22ec100 100644 --- a/src/pazpar2.c +++ b/src/pazpar2.c @@ -1,4 +1,4 @@ -/* $Id: pazpar2.c,v 1.18 2007-01-08 12:43:41 adam Exp $ */; +/* $Id: pazpar2.c,v 1.19 2007-01-08 18:32:35 quinn Exp $ */; #include #include @@ -67,9 +67,11 @@ static char *client_states[] = { "Client_Stopped" }; +// Note: Some things in this structure will eventually move to configuration struct parameters global_parameters = { 0, + 0, 30, "81", "Index Data PazPar2 (MasterKey)", @@ -358,6 +360,8 @@ static void add_facet(struct session *s, const char *type, const char *value) termlist_insert(s->termlists[i].termlist, value); } +int yaz_marc_write_xml(); + static xmlDoc *normalize_record(struct client *cl, Z_External *rec) { struct conf_retrievalprofile *rprofile = cl->database->rprofile; @@ -424,9 +428,13 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) { xmlDoc *xdoc = normalize_record(cl, rec); xmlNode *root, *n; - struct record *res, *head; + struct record *res; + struct record_cluster *cluster; struct session *se = cl->session; xmlChar *mergekey, *mergekey_norm; + xmlChar *type; + xmlChar *value; + struct conf_service *service = global_parameters.server->service; if (!xdoc) return 0; @@ -440,55 +448,109 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) } res = nmem_malloc(se->nmem, sizeof(struct record)); - res->next_cluster = 0; + res->next = 0; res->target_offset = -1; res->term_frequency_vec = 0; - res->title = "Unknown"; + res->metadata = nmem_malloc(se->nmem, + sizeof(struct record_metadata*) * service->num_metadata); + bzero(res->metadata, sizeof(struct record_metadata*) * service->num_metadata); res->relevance = 0; mergekey_norm = nmem_strdup(se->nmem, (char*) mergekey); xmlFree(mergekey); - res->merge_key = normalize_mergekey(mergekey_norm); + normalize_mergekey(mergekey_norm); - head = reclist_insert(se->reclist, res); - if (!head) + cluster = reclist_insert(se->reclist, res, mergekey_norm); + if (!cluster) { /* no room for record */ xmlFreeDoc(xdoc); return 0; } - relevance_newrec(se->relevance, head); + relevance_newrec(se->relevance, cluster); + type = value = 0; for (n = root->children; n; n = n->next) { + if (type) + xmlFree(type); + if (value) + xmlFree(value); + type = value = 0; + if (n->type != XML_ELEMENT_NODE) continue; - if (!strcmp(n->name, "facet")) + if (!strcmp(n->name, "metadata")) { - xmlChar *type = xmlGetProp(n, "type"); - xmlChar *value = xmlNodeListGetString(xdoc, n->children, 0); - if (type && value) + type = xmlGetProp(n, "type"); + value = xmlNodeListGetString(xdoc, n->children, 0); + struct conf_metadata *md = 0; + struct record_metadata **wheretoput, *newm; + int imeta; + + // First, find out what field we're looking at + for (imeta = 0; imeta < service->num_metadata; imeta++) + if (!strcmp(type, service->metadata[imeta].name)) + { + md = &service->metadata[imeta]; + break; + } + if (!md) { - add_facet(se, type, value); - relevance_countwords(se->relevance, head, value, 1); + yaz_log(YLOG_WARN, "Ignoring unknown metadata element: %s", type); + continue; } - xmlFree(type); - xmlFree(value); - } - else if (!strcmp(n->name, "metadata")) - { - xmlChar *type = xmlGetProp(n, "type"); - if (type && !strcmp(type, "title")) + + // Find out where we are putting it + if (md->merge == Metadata_merge_no) + wheretoput = &res->metadata[imeta]; + else + wheretoput = &cluster->metadata[imeta]; + + // Put it there + newm = nmem_malloc(se->nmem, sizeof(struct record_metadata)); + newm->next = 0; + if (md->type == Metadata_type_generic) + { + newm->data.text = nmem_strdup(se->nmem, value); + } + else { - xmlChar *value = xmlNodeListGetString(xdoc, n->children, 0); - if (value) + yaz_log(YLOG_WARN, "Unknown type in metadata element %s", type); + continue; + } + if (md->merge == Metadata_merge_unique) + { + struct record_metadata *mnode; + for (mnode = *wheretoput; mnode; mnode = mnode->next) + if (!strcmp(mnode->data.text, mnode->data.text)) + break; + if (!mnode) { - res->title = nmem_strdup(se->nmem, value); - relevance_countwords(se->relevance, head, value, 4); - xmlFree(value); + newm->next = *wheretoput; + *wheretoput = newm; } } + else if (md->merge == Metadata_merge_longest) + { + if (!*wheretoput || + strlen(newm->data.text) > strlen((*wheretoput)->data.text)) + *wheretoput = newm; + } + else if (md->merge == Metadata_merge_all || md->merge == Metadata_merge_no) + { + newm->next = *wheretoput; + *wheretoput = newm; + } + else + yaz_log(YLOG_WARN, "Don't know how to merge on element name %s", md->name); + + relevance_countwords(se->relevance, cluster, value, 4); + if (md->termlist) + add_facet(se, type, value); xmlFree(type); + xmlFree(value); + type = value = 0; } else yaz_log(YLOG_WARN, "Unexpected element %s in internal record", n->name); @@ -496,7 +558,7 @@ static struct record *ingest_record(struct client *cl, Z_External *rec) xmlFreeDoc(xdoc); - relevance_donerecord(se->relevance, head); + relevance_donerecord(se->relevance, cluster); se->total_records++; return res; @@ -1184,11 +1246,11 @@ void report_nmem_stats(void) } #endif -struct record **show(struct session *s, int start, int *num, int *total, +struct record_cluster **show(struct session *s, int start, int *num, int *total, int *sumhits, NMEM nmem_show) { - struct record **recs = nmem_malloc(nmem_show, *num - * sizeof(struct record *)); + struct record_cluster **recs = nmem_malloc(nmem_show, *num + * sizeof(struct record_cluster *)); int i; #if USE_TIMING yaz_timing_t t = yaz_timing_create(); @@ -1208,7 +1270,7 @@ struct record **show(struct session *s, int start, int *num, int *total, for (i = 0; i < *num; i++) { - struct record *r = reclist_read_record(s->reclist); + struct record_cluster *r = reclist_read_record(s->reclist); if (!r) { *num = i; @@ -1311,6 +1373,13 @@ int main(int argc, char **argv) } } + if (!config) + { + yaz_log(YLOG_FATAL, "Load config with -f"); + exit(1); + } + global_parameters.server = config->servers; + if (!setport) { fprintf(stderr, "Set command port with -h\n"); diff --git a/src/pazpar2.h b/src/pazpar2.h index 4ee4c0f..4d8a16d 100644 --- a/src/pazpar2.h +++ b/src/pazpar2.h @@ -20,28 +20,33 @@ struct record; struct client; -struct record_metadata -{ - union - { +struct record_metadata { + union { char *text; struct { - int first; - int last; - } year_range; - int year; - } interpretation; + int year1; + int year2; + } year; + } data; + struct record_metadata *next; // next item of this name }; struct record { struct client *client; - char *title; int target_offset; + struct record_metadata **metadata; // Array mirrors list of metadata fields in config + int relevance; + int *term_frequency_vec; + struct record *next; +}; + +struct record_cluster +{ + struct record_metadata **metadata; // Array mirrors list of metadata fields in config char *merge_key; - struct record_metadata *md; int relevance; int *term_frequency_vec; - struct record *next_cluster; + struct record *records; }; struct connection; @@ -165,6 +170,7 @@ struct hitsbytarget { }; struct parameters { + struct conf_server *server; int dump_records; int timeout; /* operations timeout, in seconds */ char implementationId[128]; @@ -187,7 +193,7 @@ void destroy_session(struct session *s); int load_targets(struct session *s, const char *fn); void statistics(struct session *s, struct statistics *stat); char *search(struct session *s, char *query); -struct record **show(struct session *s, int start, int *num, int *total, +struct record_cluster **show(struct session *s, int start, int *num, int *total, int *sumhits, NMEM nmem_show); struct termlist_score **termlist(struct session *s, const char *name, int *num); void session_set_watch(struct session *s, int what, session_watchfun fun, void *data); diff --git a/src/reclists.c b/src/reclists.c index b9ac9f1..1f0350b 100644 --- a/src/reclists.c +++ b/src/reclists.c @@ -1,5 +1,5 @@ /* - * $Id: reclists.c,v 1.3 2007-01-08 12:43:41 adam Exp $ + * $Id: reclists.c,v 1.4 2007-01-08 18:32:35 quinn Exp $ */ #include @@ -13,13 +13,15 @@ #include "pazpar2.h" #include "reclists.h" +extern struct parameters global_parameters; + struct reclist_bucket { - struct record *record; + struct record_cluster *record; struct reclist_bucket *next; }; -struct record *reclist_read_record(struct reclist *l) +struct record_cluster *reclist_read_record(struct reclist *l) { if (l->pointer < l->num_records) return l->flatlist[l->pointer++]; @@ -65,47 +67,58 @@ struct reclist *reclist_create(NMEM nmem, int numrecs) res->hashmask = hashsize - 1; // Creates a bitmask res->num_records = 0; - res->flatlist = nmem_malloc(nmem, numrecs * sizeof(struct record*)); + res->flatlist = nmem_malloc(nmem, numrecs * sizeof(struct record_cluster*)); res->flatlist_size = numrecs; return res; } -struct record *reclist_insert(struct reclist *l, struct record *record) +// Insert a record. Return record cluster (newly formed or pre-existing) +struct record_cluster *reclist_insert(struct reclist *l, struct record *record, + char *merge_key) { unsigned int bucket; struct reclist_bucket **p; - struct record *head = 0; + struct record_cluster *cluster = 0; + struct conf_service *service = global_parameters.server->service; - bucket = hash((unsigned char*) record->merge_key) & l->hashmask; + bucket = hash((unsigned char*) merge_key) & l->hashmask; for (p = &l->hashtable[bucket]; *p; p = &(*p)->next) { // We found a matching record. Merge them - if (!strcmp(record->merge_key, (*p)->record->merge_key)) + if (!strcmp(merge_key, (*p)->record->merge_key)) { - struct record *existing = (*p)->record; - record->next_cluster = existing->next_cluster; - existing->next_cluster = record; - head = existing; + struct record_cluster *existing = (*p)->record; + record->next = existing->records; + existing->records = record; + cluster = existing; break; } } - if (!head && l->num_records < l->flatlist_size) + if (!cluster && l->num_records < l->flatlist_size) { struct reclist_bucket *new = nmem_malloc(l->nmem, sizeof(struct reclist_bucket)); + struct record_cluster *newc = + nmem_malloc(l->nmem, sizeof(struct record_cluster)); - assert(!*p); - - new->record = record; - record->next_cluster = 0; + record->next = 0; + new->record = newc; new->next = 0; + newc->records = record; + newc->merge_key = merge_key; + newc->relevance = 0; + newc->term_frequency_vec = 0; + newc->metadata = 0; + newc->metadata = nmem_malloc(l->nmem, + sizeof(struct record_metadata*) * service->num_metadata); + bzero(newc->metadata, sizeof(struct record_metadata*) * service->num_metadata); + *p = new; - assert(l->num_records < l->flatlist_size); - l->flatlist[l->num_records++] = record; - head = record; + l->flatlist[l->num_records++] = newc; + cluster = newc; } - return head; + return cluster; } diff --git a/src/reclists.h b/src/reclists.h index f9d38c3..cca3e42 100644 --- a/src/reclists.h +++ b/src/reclists.h @@ -7,7 +7,7 @@ struct reclist int hashtable_size; int hashmask; - struct record **flatlist; + struct record_cluster **flatlist; int flatlist_size; int num_records; int pointer; @@ -16,8 +16,9 @@ struct reclist }; struct reclist *reclist_create(NMEM, int numrecs); -struct record * reclist_insert(struct reclist *tl, struct record *record); -struct record *reclist_read_record(struct reclist *l); +struct record_cluster *reclist_insert(struct reclist *tl, struct record *record, + char *merg_key); +struct record_cluster *reclist_read_record(struct reclist *l); void reclist_rewind(struct reclist *l); #endif diff --git a/src/relevance.c b/src/relevance.c index 75800a2..c7c3f12 100644 --- a/src/relevance.c +++ b/src/relevance.c @@ -1,5 +1,5 @@ /* - * $Id: relevance.c,v 1.4 2007-01-08 12:43:41 adam Exp $ + * $Id: relevance.c,v 1.5 2007-01-08 18:32:35 quinn Exp $ */ #include @@ -127,7 +127,7 @@ struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs) return res; } -void relevance_newrec(struct relevance *r, struct record *rec) +void relevance_newrec(struct relevance *r, struct record_cluster *rec) { if (!rec->term_frequency_vec) { @@ -139,7 +139,7 @@ void relevance_newrec(struct relevance *r, struct record *rec) // FIXME. The definition of a word is crude here.. should support // some form of localization mechanism? -void relevance_countwords(struct relevance *r, struct record *head, +void relevance_countwords(struct relevance *r, struct record_cluster *cluster, const char *words, int multiplier) { while (*words) @@ -155,23 +155,23 @@ void relevance_countwords(struct relevance *r, struct record *head, if ((res = word_trie_match(r->wt, words, &skipped))) { words += skipped; - head->term_frequency_vec[res] += multiplier; + cluster->term_frequency_vec[res] += multiplier; } else { while (*words && (c = raw_char(tolower(*words))) >= 0) words++; } - head->term_frequency_vec[0]++; + cluster->term_frequency_vec[0]++; } } -void relevance_donerecord(struct relevance *r, struct record *head) +void relevance_donerecord(struct relevance *r, struct record_cluster *cluster) { int i; for (i = 1; i < r->vec_len; i++) - if (head->term_frequency_vec[i] > 0) + if (cluster->term_frequency_vec[i] > 0) r->doc_frequency_vec[i]++; r->doc_frequency_vec[0]++; @@ -218,7 +218,7 @@ void relevance_prepare_read(struct relevance *rel, struct reclist *reclist) for (i = 0; i < reclist->num_records; i++) { int t; - struct record *rec = reclist->flatlist[i]; + struct record_cluster *rec = reclist->flatlist[i]; float relevance; relevance = 0; for (t = 1; t < rel->vec_len; t++) diff --git a/src/relevance.h b/src/relevance.h index 1639cf3..11350b3 100644 --- a/src/relevance.h +++ b/src/relevance.h @@ -9,10 +9,10 @@ struct relevance; struct relevance *relevance_create(NMEM nmem, const char **terms, int numrecs); -void relevance_newrec(struct relevance *r, struct record *rec); -void relevance_countwords(struct relevance *r, struct record *rec, +void relevance_newrec(struct relevance *r, struct record_cluster *cluster); +void relevance_countwords(struct relevance *r, struct record_cluster *cluster, const char *words, int multiplier); -void relevance_donerecord(struct relevance *r, struct record *rec); +void relevance_donerecord(struct relevance *r, struct record_cluster *cluster); void relevance_prepare_read(struct relevance *rel, struct reclist *rec); diff --git a/src/termlists.c b/src/termlists.c index e586169..6c85495 100644 --- a/src/termlists.c +++ b/src/termlists.c @@ -1,5 +1,5 @@ /* - * $Id: termlists.c,v 1.2 2007-01-08 12:43:41 adam Exp $ + * $Id: termlists.c,v 1.3 2007-01-08 18:32:35 quinn Exp $ */ #include @@ -88,7 +88,7 @@ static void update_highscore(struct termlist *tl, struct termlist_score *t) int smallest; int me = -1; - if (t->frequency < tl->highscore_min) + if (tl->highscore_num > tl->highscore_size && t->frequency < tl->highscore_min) return; smallest = 0; @@ -101,6 +101,8 @@ static void update_highscore(struct termlist *tl, struct termlist_score *t) } if (tl->highscore_num) tl->highscore_min = tl->highscore[smallest]->frequency; + if (t->frequency < tl->highscore_min) + tl->highscore_min = t->frequency; if (me >= 0) return; if (tl->highscore_num < tl->highscore_size) diff --git a/www/test1/search.js b/www/test1/search.js index d5df1a6..5d25b70 100644 --- a/www/test1/search.js +++ b/www/test1/search.js @@ -1,4 +1,4 @@ -/* $Id: search.js,v 1.8 2007-01-05 02:12:51 quinn Exp $ +/* $Id: search.js,v 1.9 2007-01-08 18:32:35 quinn Exp $ * --------------------------------------------------- * Javascript container */ @@ -185,7 +185,7 @@ function show_records() { body.innerHTML += '

'; body.innerHTML += (i + start + 1) + ': '; - var mk = hits[i].getElementsByTagName("title"); + var mk = hits[i].getElementsByTagName("md-title"); if (mk[0]) body.innerHTML += mk[0].childNodes[0].nodeValue; body.innerHTML += '

'; -- 1.7.10.4