Fixed bug 854. Using nmem_split() to split phrase terms on word boundaries.
[pazpar2-moved-to-github.git] / src / logic.c
index bfdaec1..c581f2d 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: logic.c,v 1.3 2007-04-16 21:54:43 adam Exp $
+/* $Id: logic.c,v 1.9 2007-04-18 19:45:09 quinn Exp $
    Copyright (c) 2006-2007, Index Data.
 
 This file is part of Pazpar2.
@@ -103,7 +103,7 @@ struct parameters global_parameters =
     0,
     30,
     "81",
-    "Index Data PazPar2 (MasterKey)",
+    "Index Data PazPar2",
     VERSION,
     600, // 10 minutes
     60,
@@ -206,8 +206,13 @@ static void send_init(IOCHAN i)
     odr_reset(global_parameters.odr_out);
 }
 
+// Recursively traverse query structure to extract terms.
 static void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num)
 {
+    char **words;
+    int numwords;
+    int i;
+
     switch (n->kind)
     {
         case CCL_RPN_AND:
@@ -218,7 +223,9 @@ static void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *
             pull_terms(nmem, n->u.p[1], termlist, num);
             break;
         case CCL_RPN_TERM:
-            termlist[(*num)++] = nmem_strdup(nmem, n->u.t.term);
+            nmem_strsplit(nmem, " ", n->u.t.term, &words, &numwords);
+            for (i = 0; i < numwords; i++)
+                termlist[(*num)++] = words[i];
             break;
         default: // NOOP
             break;
@@ -241,10 +248,9 @@ static void send_search(IOCHAN i)
     struct session *se = cl->session;
     struct session_database *sdb = cl->database;
     Z_APDU *a = zget_APDU(global_parameters.odr_out, Z_APDU_searchRequest);
-    int ndb, cerror, cpos;
+    int ndb;
     char **databaselist;
     Z_Query *zquery;
-    struct ccl_rpn_node *cn;
     int ssub = 0, lslb = 100000, mspn = 10;
     char *recsyn = 0;
     char *piggyback = 0;
@@ -253,25 +259,11 @@ static void send_search(IOCHAN i)
 
     yaz_log(YLOG_DEBUG, "Sending search to %s", cl->database->database->url);
 
-    cn = ccl_find_str(sdb->database->ccl_map, se->query, &cerror, &cpos);
-    if (!cn)
-        return;
-
-    if (!se->relevance)
-    {
-        // Initialize relevance structure with query terms
-        char *p[512];
-        extract_terms(se->nmem, cn, p);
-        se->relevance = relevance_create(se->nmem, (const char **) p,
-                se->expected_maxrecs);
-    }
-
     // constructing RPN query
     a->u.searchRequest->query = zquery = odr_malloc(global_parameters.odr_out,
             sizeof(Z_Query));
     zquery->which = Z_Query_type_1;
-    zquery->u.type_1 = ccl_rpn_query(global_parameters.odr_out, cn);
-    ccl_rpn_delete(cn);
+    zquery->u.type_1 = p_query_rpn(global_parameters.odr_out, cl->pquery);
 
     // converting to target encoding
     if ((queryenc = session_setting_oneval(sdb, PZ_QUERYENCODING))){
@@ -699,7 +691,9 @@ static struct record *ingest_record(struct client *cl, Z_External *rec)
     xmlFree(mergekey);
     normalize_mergekey((char *) mergekey_norm, 0);
 
-    cluster = reclist_insert(global_parameters.server->service, se->reclist, res, (char *) mergekey_norm, 
+    cluster = reclist_insert(se->reclist, 
+                             global_parameters.server->service, 
+                             res, (char *) mergekey_norm, 
                              &se->total_merged);
     if (global_parameters.dump_records)
         yaz_log(YLOG_LOG, "Cluster id %d from %s (#%d)", cluster->recid,
@@ -1257,6 +1251,41 @@ static int client_prep_connection(struct client *cl)
         return 0;
 }
 
+// Parse the query given the settings specific to this client
+static int client_parse_query(struct client *cl)
+{
+    struct session *se = cl->session;
+    struct ccl_rpn_node *cn;
+    int cerror, cpos;
+
+    cn = ccl_find_str(cl->database->database->ccl_map, se->query, &cerror, &cpos);
+    if (!cn)
+    {
+        cl->state = Client_Error;
+        yaz_log(YLOG_WARN, "Failed to parse query for %s",
+                         cl->database->database->url);
+        return -1;
+    }
+    wrbuf_rewind(se->wrbuf);
+    ccl_pquery(se->wrbuf, cn);
+    wrbuf_putc(se->wrbuf, '\0');
+    if (cl->pquery)
+        xfree(cl->pquery);
+    cl->pquery = xstrdup(wrbuf_buf(se->wrbuf));
+
+    if (!se->relevance)
+    {
+        // Initialize relevance structure with query terms
+        char *p[512];
+        extract_terms(se->nmem, cn, p);
+        se->relevance = relevance_create(se->nmem, (const char **) p,
+                se->expected_maxrecs);
+    }
+
+    ccl_rpn_delete(cn);
+    return 0;
+}
+
 static struct client *client_create(void)
 {
     struct client *r;
@@ -1267,6 +1296,7 @@ static struct client *client_create(void)
     }
     else
         r = xmalloc(sizeof(struct client));
+    r->pquery = 0;
     r->database = 0;
     r->connection = 0;
     r->session = 0;
@@ -1402,25 +1432,26 @@ char *search(struct session *se, char *query, char *filter)
     criteria = parse_filter(se->nmem, filter);
     strcpy(se->query, query);
     se->requestid++;
-    select_targets(se, criteria);
-    for (cl = se->clients; cl; cl = cl->next)
-    {
-        if (client_prep_connection(cl))
-            live_channels++;
-    }
+    live_channels = select_targets(se, criteria);
     if (live_channels)
     {
         int maxrecs = live_channels * global_parameters.toget;
         se->num_termlists = 0;
         se->reclist = reclist_create(se->nmem, maxrecs);
         // This will be initialized in send_search()
-        se->relevance = 0;
         se->total_records = se->total_hits = se->total_merged = 0;
         se->expected_maxrecs = maxrecs;
     }
     else
         return "NOTARGETS";
 
+    se->relevance = 0;
+    for (cl = se->clients; cl; cl = cl->next)
+        if (client_parse_query(cl) < 0)  // Query must parse for all targets
+            return "QUERY";
+    for (cl = se->clients; cl; cl = cl->next)
+        client_prep_connection(cl);
+
     return 0;
 }