CCL field ranking boost.
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 11 Jun 2012 17:54:18 +0000 (19:54 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 11 Jun 2012 17:54:18 +0000 (19:54 +0200)
New metadata attribute frank="cclfield value" specifies boost
factor value for cclfield rather than the rank value, for search
terms as part of cclfield. For example, if
<metadata .. rank="2" frank="au 5"/> would specify rank weight
5 for au CCL field terms (author) and 2 for everything else.

src/pazpar2_config.c
src/pazpar2_config.h
src/relevance.c
src/relevance.h
src/session.c
test/test_http.cfg
test/test_http.urls
test/test_http_77.res [new file with mode: 0644]
test/test_http_78.res [new file with mode: 0644]
test/test_http_79.res [new file with mode: 0644]
test/z3950_indexdata_com_marc.xml

index bff5810..d992a33 100644 (file)
@@ -76,7 +76,8 @@ static void conf_metadata_assign(NMEM nmem,
                                  int sortkey_offset,
                                  enum conf_metadata_mergekey mt,
                                  const char *facetrule,
-                                 const char *limitmap)
+                                 const char *limitmap,
+                                 const char *frank)
 {
     assert(nmem && metadata && name);
     
@@ -98,6 +99,7 @@ static void conf_metadata_assign(NMEM nmem,
     metadata->mergekey = mt;
     metadata->facetrule = nmem_strdup_null(nmem, facetrule);
     metadata->limitmap = nmem_strdup_null(nmem, limitmap);
+    metadata->frank = nmem_strdup_null(nmem, frank);
 }
 
 
@@ -177,7 +179,8 @@ static struct conf_metadata* conf_service_add_metadata(
     int sortkey_offset,
     enum conf_metadata_mergekey mt,
     const char *facetrule,
-    const char *limitmap
+    const char *limitmap,
+    const char *frank
     )
 {
     struct conf_metadata * md = 0;
@@ -189,7 +192,7 @@ static struct conf_metadata* conf_service_add_metadata(
     md = service->metadata + field_id;
     conf_metadata_assign(service->nmem, md, name, type, merge, setting,
                          brief, termlist, rank, sortkey_offset,
-                         mt, facetrule, limitmap);
+                         mt, facetrule, limitmap, frank);
     return md;
 }
 
@@ -294,6 +297,8 @@ static int parse_metadata(struct conf_service *service, xmlNode *n,
     xmlChar *xml_mergekey = 0;
     xmlChar *xml_limitmap = 0;
     xmlChar *xml_icu_chain = 0;
+    xmlChar *xml_frank = 0;
+
     struct _xmlAttr *attr;
     for (attr = n->properties; attr; attr = attr->next)
     {
@@ -330,6 +335,9 @@ static int parse_metadata(struct conf_service *service, xmlNode *n,
         else if (!xmlStrcmp(attr->name, BAD_CAST "limitmap") &&
                  attr->children && attr->children->type == XML_TEXT_NODE)
             xml_limitmap = attr->children->content;
+        else if (!xmlStrcmp(attr->name, BAD_CAST "frank") &&
+                 attr->children && attr->children->type == XML_TEXT_NODE)
+            xml_frank = attr->children->content;
         else
         {
             yaz_log(YLOG_FATAL, "Unknown metadata attribute '%s'", attr->name);
@@ -470,7 +478,10 @@ static int parse_metadata(struct conf_service *service, xmlNode *n,
                               (const char *) xml_name,
                               type, merge, setting,
                               brief, termlist, rank, sortkey_offset,
-                              mergekey_type, (const char *) xml_icu_chain, (const char *) xml_limitmap);
+                              mergekey_type,
+                              (const char *) xml_icu_chain,
+                              (const char *) xml_limitmap,
+                              (const char *) xml_frank);
     (*md_node)++;
     return 0;
 }
index 692d260..18dfde6 100644 (file)
@@ -85,7 +85,7 @@ struct conf_metadata
     char *facetrule;
 
     char *limitmap;  // Should be expanded into service-wide default e.g. pz:limitmap:<name>=value setting
-    char *facetmap;  // Should be expanded into service-wide default e.g. pz:facetmap:<name>=value setting
+    char *frank;
 };
 
 
index 418dc9a..7f1943a 100644 (file)
@@ -44,34 +44,45 @@ struct word_entry {
     struct word_entry *next;
 };
 
-int word_entry_match(struct word_entry *entries, const char *norm_str)
+static int word_entry_match(struct word_entry *entries, const char *norm_str,
+                            const char *frank, int *local_mult)
 {
     for (; entries; entries = entries->next)
     {
         if (!strcmp(norm_str, entries->norm_str))
+        {
+            const char *cp = 0;
+            if (frank && (cp = strchr(frank, ' ')))
+            {
+                if ((cp - frank) == strlen(entries->ccl_field) &&
+                    memcmp(entries->ccl_field, frank, cp - frank) == 0)
+                    *local_mult = atoi(cp + 1);
+            }
             return entries->termno;
+        }
     }
     return 0;
 }
 
 void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
-                          const char *words, int multiplier, const char *name)
+                          const char *words, int multiplier, const char *name,
+                          const char *frank)
 {
     int *mult = cluster->term_frequency_vec_tmp;
     const char *norm_str;
     int i, length = 0;
-
     pp2_charset_token_first(r->prt, words, 0);
     for (i = 1; i < r->vec_len; i++)
         mult[i] = 0;
 
     while ((norm_str = pp2_charset_token_next(r->prt)))
     {
-        int res = word_entry_match(r->entries, norm_str);
+        int local_mult = multiplier;
+        int res = word_entry_match(r->entries, norm_str, frank, &local_mult);
         if (res)
         {
             assert(res < r->vec_len);
-            mult[res] += multiplier;
+            mult[res] += local_mult;
         }
         length++;
     }
index f20cbec..b22a7a0 100644 (file)
@@ -33,7 +33,8 @@ struct relevance *relevance_create_ccl(pp2_charset_fact_t pft,
 void relevance_destroy(struct relevance **rp);
 void relevance_newrec(struct relevance *r, struct record_cluster *cluster);
 void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
-                          const char *words, int multiplier, const char *name);
+                          const char *words, int multiplier, const char *name,
+                          const char *frank);
 void relevance_donerecord(struct relevance *r, struct record_cluster *cluster);
 
 void relevance_prepare_read(struct relevance *rel, struct reclist *rec);
index 105c064..4049360 100644 (file)
@@ -1933,12 +1933,12 @@ static int ingest_to_cluster(struct client *cl,
                 }
             }
 
-
             // ranking of _all_ fields enabled ... 
             if (rank)
             {
                 relevance_countwords(se->relevance, cluster, 
-                                     (char *) value, rank, ser_md->name);
+                                     (char *) value, rank, ser_md->name,
+                                     ser_md->frank);
             }
 
             // construct facets ... unless the client already has reported them
index d084f9e..8904f3b 100644 (file)
@@ -13,7 +13,8 @@
       <metadata name="isbn"/>
       <metadata name="date" brief="yes" sortkey="numeric" type="year" merge="range"
                termlist="yes"/>
-      <metadata name="author" brief="yes" termlist="yes" merge="longest" rank="2" mergekey="optional" />
+      <metadata name="author" brief="yes" termlist="yes" merge="longest"
+            frank="au 3" rank="2" mergekey="optional" />
       <metadata name="subject" brief="yes" merge="unique" termlist="yes" rank="3"/>
       <metadata name="id"/>
       <metadata name="lccn" merge="unique"/>
index 5da644d..47247d2 100644 (file)
@@ -74,3 +74,6 @@ http://localhost:9763/search.pz2?session=9&command=search&query=computer&limit=M
 http://localhost:9763/search.pz2?session=9&command=show&block=1
 http://localhost:9763/search.pz2?session=9&command=search&query=computer&limit=date%3D1977
 http://localhost:9763/search.pz2?session=9&command=show&block=1
+http://localhost:9763/search.pz2?command=init
+http://localhost:9763/search.pz2?session=10&command=search&query=au%3dadam
+http://localhost:9763/search.pz2?session=10&command=show&block=1
diff --git a/test/test_http_77.res b/test/test_http_77.res
new file mode 100644 (file)
index 0000000..c41b6b8
--- /dev/null
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<init><status>OK</status><session>10</session><protocol>1</protocol><keepAlive>50000</keepAlive>
+</init>
\ No newline at end of file
diff --git a/test/test_http_78.res b/test/test_http_78.res
new file mode 100644 (file)
index 0000000..ab63fe6
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<search><status>OK</status></search>
\ No newline at end of file
diff --git a/test/test_http_79.res b/test/test_http_79.res
new file mode 100644 (file)
index 0000000..c4aa933
--- /dev/null
@@ -0,0 +1,52 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<show><status>OK</status>
+<activeclients>0</activeclients>
+<merged>2</merged>
+<total>2</total>
+<start>0</start>
+<num>2</num>
+<hit>
+
+<md-title>The religious teachers of Greece</md-title>
+<md-date>1972</md-date>
+<md-author>Adam, James</md-author>
+<md-subject>Greek literature</md-subject>
+<md-subject>Philosophy, Ancient</md-subject>
+<md-subject>Greece</md-subject>
+<md-description>Reprint of the 1909 ed., which was issued as the 1904-1906 Gifford lectures</md-description><location id="z3950.indexdata.com/marc" name="Index Data MARC test server">
+<md-title>The religious teachers of Greece</md-title>
+<md-date>1972</md-date>
+<md-author>Adam, James</md-author>
+<md-subject>Greek literature</md-subject>
+<md-subject>Philosophy, Ancient</md-subject>
+<md-subject>Greece</md-subject>
+<md-description tag="500">Reprint of the 1909 ed., which was issued as the 1904-1906 Gifford lectures</md-description>
+<md-description tag="504">Includes bibliographical references</md-description>
+<md-test-usersetting>XXXXXXXXXX</md-test-usersetting>
+<md-test-usersetting-2>test-usersetting-2 data: 
+        YYYYYYYYY</md-test-usersetting-2></location>
+<relevance>150000</relevance>
+<recid>content: title the religious teachers of greece author adam james medium book</recid>
+</hit>
+<hit>
+
+<md-title>Four psalms</md-title>
+<md-title-remainder>XXIII, XXXVI, LII, CXXI</md-title-remainder>
+<md-date>1980</md-date>
+<md-author>Smith, George Adam</md-author>
+<md-subject>Bible</md-subject><location id="z3950.indexdata.com/marc" name="Index Data MARC test server">
+<md-title>Four psalms</md-title>
+<md-title-remainder>XXIII, XXXVI, LII, CXXI</md-title-remainder>
+<md-date>1980</md-date>
+<md-author>Smith, George Adam</md-author>
+<md-subject>Bible</md-subject>
+<md-subject>Bible</md-subject>
+<md-subject>Bible</md-subject>
+<md-subject>Bible</md-subject>
+<md-test-usersetting>XXXXXXXXXX</md-test-usersetting>
+<md-test-usersetting-2>test-usersetting-2 data: 
+        YYYYYYYYY</md-test-usersetting-2></location>
+<relevance>100000</relevance>
+<recid>content: title four psalms author smith george adam medium book</recid>
+</hit>
+</show>
\ No newline at end of file
index 8fe0a37..2a41e4b 100644 (file)
@@ -6,7 +6,7 @@
 
   <!-- field-specific mappings -->
   <set name="pz:cclmap:ti" value="u=4 s=al"/>
-  <set name="pz:cclmap:su" value="u=21 s=al"/>
+  <set name="pz:cclmap:au" value="u=1003 s=al"/>
   <set name="pz:cclmap:isbn" value="u=7"/>
   <set name="pz:cclmap:issn" value="u=8"/>
   <set name="pz:cclmap:date" value="u=30 r=r"/>