1 /* This file is part of Pazpar2.
2 Copyright (C) Index Data
4 Pazpar2 is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 \brief high-level logic; mostly user sessions and settings
46 #include <yaz/marcdisp.h>
47 #include <yaz/comstack.h>
48 #include <yaz/tcpip.h>
49 #include <yaz/proto.h>
50 #include <yaz/readconf.h>
51 #include <yaz/pquery.h>
52 #include <yaz/otherinfo.h>
53 #include <yaz/yaz-util.h>
55 #include <yaz/query-charset.h>
56 #include <yaz/querytowrbuf.h>
57 #include <yaz/oid_db.h>
58 #include <yaz/snprintf.h>
62 #include <yaz/timing.h>
66 #include "parameters.h"
70 #include "termlists.h"
72 #include "relevance.h"
76 #include "normalize7bit.h"
78 #include <libxml/tree.h>
82 #define MAX(a,b) ((a)>(b)?(a):(b))
84 // Note: Some things in this structure will eventually move to configuration
85 struct parameters global_parameters =
89 0, // predictable sessions
93 struct client *client;
94 struct client_list *next;
97 /* session counting (1) , disable client counting (0) */
98 static YAZ_MUTEX g_session_mutex = 0;
99 static int no_sessions = 0;
101 static int session_use(int delta)
104 if (!g_session_mutex)
105 yaz_mutex_create(&g_session_mutex);
106 yaz_mutex_enter(g_session_mutex);
107 no_sessions += delta;
108 sessions = no_sessions;
109 yaz_mutex_leave(g_session_mutex);
110 yaz_log(YLOG_DEBUG, "%s sessions=%d", delta == 0 ? "" :
111 (delta > 0 ? "INC" : "DEC"), no_sessions);
115 int sessions_count(void)
117 return session_use(0);
120 static void log_xml_doc(xmlDoc *doc)
122 FILE *lf = yaz_log_file();
125 #if LIBXML_VERSION >= 20600
126 xmlDocDumpFormatMemory(doc, &result, &len, 1);
128 xmlDocDumpMemory(doc, &result, &len);
132 (void) fwrite(result, 1, len, lf);
138 static void session_enter(struct session *s, const char *caller)
141 session_log(s, YLOG_DEBUG, "Session lock by %s", caller);
142 yaz_mutex_enter(s->session_mutex);
145 static void session_leave(struct session *s, const char *caller)
147 yaz_mutex_leave(s->session_mutex);
149 session_log(s, YLOG_DEBUG, "Session unlock by %s", caller);
152 static int run_icu(struct session *s, const char *icu_chain_id,
154 WRBUF norm_wr, WRBUF disp_wr)
156 const char *facet_component;
157 struct conf_service *service = s->service;
158 pp2_charset_token_t prt =
159 pp2_charset_token_create(service->charsets, icu_chain_id);
162 session_log(s, YLOG_FATAL,
163 "Unknown ICU chain '%s'", icu_chain_id);
166 pp2_charset_token_first(prt, value, 0);
167 while ((facet_component = pp2_charset_token_next(prt)))
169 const char *display_component;
170 if (*facet_component)
172 if (wrbuf_len(norm_wr))
173 wrbuf_puts(norm_wr, " ");
174 wrbuf_puts(norm_wr, facet_component);
176 display_component = pp2_get_display(prt);
177 if (display_component)
179 if (wrbuf_len(disp_wr))
180 wrbuf_puts(disp_wr, " ");
181 wrbuf_puts(disp_wr, display_component);
184 pp2_charset_token_destroy(prt);
188 static void session_normalize_facet(struct session *s,
189 const char *type, const char *value,
190 WRBUF display_wrbuf, WRBUF facet_wrbuf)
192 struct conf_service *service = s->service;
194 const char *icu_chain_id = 0;
196 for (i = 0; i < service->num_metadata; i++)
197 if (!strcmp((service->metadata + i)->name, type))
198 icu_chain_id = (service->metadata + i)->facetrule;
200 icu_chain_id = "facet";
202 run_icu(s, icu_chain_id, value, facet_wrbuf, display_wrbuf);
205 void add_facet(struct session *s, const char *type, const char *value, int count)
207 WRBUF facet_wrbuf = wrbuf_alloc();
208 WRBUF display_wrbuf = wrbuf_alloc();
210 session_normalize_facet(s, type, value, display_wrbuf, facet_wrbuf);
212 if (wrbuf_len(facet_wrbuf))
214 struct named_termlist **tp = &s->termlists;
215 for (; (*tp); tp = &(*tp)->next)
216 if (!strcmp((*tp)->name, type))
220 *tp = nmem_malloc(s->nmem, sizeof(**tp));
221 (*tp)->name = nmem_strdup(s->nmem, type);
222 (*tp)->termlist = termlist_create(s->nmem);
225 termlist_insert((*tp)->termlist, wrbuf_cstr(display_wrbuf),
226 wrbuf_cstr(facet_wrbuf), count);
228 wrbuf_destroy(facet_wrbuf);
229 wrbuf_destroy(display_wrbuf);
232 static xmlDoc *record_to_xml(struct session *se,
233 struct session_database *sdb, const char *rec)
235 struct database *db = sdb->database;
238 rdoc = xmlParseMemory(rec, strlen(rec));
242 session_log(se, YLOG_WARN, "Non-wellformed XML");
246 if (global_parameters.dump_records)
248 session_log(se, YLOG_LOG, "Un-normalized record from %s", db->id);
255 #define MAX_XSLT_ARGS 16
257 // Add static values from session database settings if applicable
258 static void insert_settings_parameters(struct session_database *sdb,
259 struct conf_service *service,
267 for (i = 0; i < service->num_metadata; i++)
269 struct conf_metadata *md = &service->metadata[i];
272 if (md->setting == Metadata_setting_parameter &&
273 (setting = settings_lookup_offset(service, md->name)) >= 0)
275 const char *val = session_setting_oneval(sdb, setting);
276 if (val && nparms < MAX_XSLT_ARGS)
279 int len = strlen(val);
280 buf = nmem_malloc(nmem, len + 3);
282 strcpy(buf + 1, val);
285 parms[offset++] = md->name;
286 parms[offset++] = buf;
294 // Add static values from session database settings if applicable
295 static void insert_settings_values(struct session_database *sdb, xmlDoc *doc,
297 struct conf_service *service)
301 for (i = 0; i < service->num_metadata; i++)
303 struct conf_metadata *md = &service->metadata[i];
306 if (md->setting == Metadata_setting_postproc &&
307 (offset = settings_lookup_offset(service, md->name)) >= 0)
309 const char *val = session_setting_oneval(sdb, offset);
312 xmlNode *n = xmlNewTextChild(root, 0, (xmlChar *) "metadata",
314 xmlSetProp(n, (xmlChar *) "type", (xmlChar *) md->name);
320 static xmlDoc *normalize_record(struct session *se,
321 struct session_database *sdb,
322 struct conf_service *service,
323 const char *rec, NMEM nmem)
325 xmlDoc *rdoc = record_to_xml(se, sdb, rec);
329 char *parms[MAX_XSLT_ARGS*2+1];
331 insert_settings_parameters(sdb, service, parms, nmem);
333 if (normalize_record_transform(sdb->map, &rdoc, (const char **)parms))
335 session_log(se, YLOG_WARN, "Normalize failed");
341 void session_settings_dump(struct session *se,
342 struct session_database *db,
347 int i, num = db->num_settings;
348 for (i = 0; i < num; i++)
350 struct setting *s = db->settings[i];
351 for (;s ; s = s->next)
353 wrbuf_puts(w, "<set name=\"");
354 wrbuf_xmlputs(w, s->name);
355 wrbuf_puts(w, "\" value=\"");
356 wrbuf_xmlputs(w, s->value);
357 wrbuf_puts(w, "\"/>");
365 // Retrieve first defined value for 'name' for given database.
366 // Will be extended to take into account user associated with session
367 const char *session_setting_oneval(struct session_database *db, int offset)
369 if (offset >= db->num_settings || !db->settings[offset])
371 return db->settings[offset]->value;
374 // Prepare XSLT stylesheets for record normalization
375 // Structures are allocated on the session_wide nmem to avoid having
376 // to recompute this for every search. This would lead
377 // to leaking if a single session was to repeatedly change the PZ_XSLT
378 // setting. However, this is not a realistic use scenario.
379 static int prepare_map(struct session *se, struct session_database *sdb)
381 if (sdb->settings && !sdb->map)
385 if (sdb->settings[PZ_XSLT] &&
386 (s = session_setting_oneval(sdb, PZ_XSLT)))
388 char auto_stylesheet[256];
390 if (!strcmp(s, "auto"))
392 const char *request_syntax = session_setting_oneval(
393 sdb, PZ_REQUESTSYNTAX);
397 yaz_snprintf(auto_stylesheet, sizeof(auto_stylesheet),
398 "%s.xsl", request_syntax);
399 for (cp = auto_stylesheet; *cp; cp++)
401 /* deliberately only consider ASCII */
402 if (*cp > 32 && *cp < 127)
409 session_log(se, YLOG_WARN,
410 "No pz:requestsyntax for auto stylesheet");
413 sdb->map = normalize_cache_get(se->normalize_cache,
422 // called if watch should be removed because http_channel is to be destroyed
423 static void session_watch_cancel(void *data, struct http_channel *c,
426 struct session_watchentry *ent = data;
433 // set watch. Returns 0=OK, -1 if watch is already set
434 int session_set_watch(struct session *s, int what,
435 session_watchfun fun, void *data,
436 struct http_channel *chan)
439 session_enter(s, "session_set_watch");
440 if (s->watchlist[what].fun)
445 s->watchlist[what].fun = fun;
446 s->watchlist[what].data = data;
447 s->watchlist[what].obs = http_add_observer(chan, &s->watchlist[what],
448 session_watch_cancel);
451 session_leave(s, "session_set_watch");
455 void session_alert_watch(struct session *s, int what)
458 session_enter(s, "session_alert_watch");
459 if (s->watchlist[what].fun)
461 /* our watch is no longer associated with http_channel */
463 session_watchfun fun;
465 http_remove_observer(s->watchlist[what].obs);
466 fun = s->watchlist[what].fun;
467 data = s->watchlist[what].data;
469 /* reset watch before fun is invoked - in case fun wants to set
471 s->watchlist[what].fun = 0;
472 s->watchlist[what].data = 0;
473 s->watchlist[what].obs = 0;
475 session_leave(s, "session_alert_watch");
476 session_log(s, YLOG_DEBUG,
477 "Alert Watch: %d calling function: %p", what, fun);
481 session_leave(s,"session_alert_watch");
484 //callback for grep_databases
485 static void select_targets_callback(struct session *se,
486 struct session_database *db)
489 struct client_list *l;
491 for (l = se->clients_cached; l; l = l->next)
492 if (client_get_database(l->client) == db)
499 cl = client_create(db->database->id);
500 client_set_database(cl, db);
502 l = xmalloc(sizeof(*l));
504 l->next = se->clients_cached;
505 se->clients_cached = l;
507 /* set session always. If may be 0 if client is not active */
508 client_set_session(cl, se);
510 l = xmalloc(sizeof(*l));
512 l->next = se->clients_active;
513 se->clients_active = l;
516 static void session_reset_active_clients(struct session *se,
517 struct client_list *new_list)
519 struct client_list *l;
521 session_enter(se, "session_reset_active_clients");
522 l = se->clients_active;
523 se->clients_active = new_list;
524 session_leave(se, "session_reset_active_clients");
528 struct client_list *l_next = l->next;
530 client_lock(l->client);
531 client_set_session(l->client, 0); /* mark client inactive */
532 client_unlock(l->client);
539 static void session_remove_cached_clients(struct session *se)
541 struct client_list *l;
543 session_reset_active_clients(se, 0);
545 session_enter(se, "session_remove_cached_clients");
546 l = se->clients_cached;
547 se->clients_cached = 0;
548 session_leave(se, "session_remove_cached_clients");
552 struct client_list *l_next = l->next;
553 client_lock(l->client);
554 client_set_session(l->client, 0);
555 client_set_database(l->client, 0);
556 client_unlock(l->client);
557 client_destroy(l->client);
563 // Associates a set of clients with a session;
564 // Note: Session-databases represent databases with per-session
566 static int select_targets(struct session *se, const char *filter)
568 return session_grep_databases(se, filter, select_targets_callback);
571 int session_active_clients(struct session *s)
573 struct client_list *l;
576 for (l = s->clients_active; l; l = l->next)
577 if (client_is_active(l->client))
583 int session_is_preferred_clients_ready(struct session *s)
585 struct client_list *l;
588 for (l = s->clients_active; l; l = l->next)
589 if (client_is_active_preferred(l->client))
591 session_log(s, YLOG_DEBUG, "Has %d active preferred clients.", res);
595 static void session_clear_set(struct session *se, struct reclist_sortparms *sp)
597 reclist_destroy(se->reclist);
598 if (nmem_total(se->nmem))
599 session_log(se, YLOG_DEBUG, "NMEN operation usage %zd",
600 nmem_total(se->nmem));
601 nmem_reset(se->nmem);
602 se->total_records = se->total_merged = 0;
604 relevance_clear(se->relevance);
606 /* reset list of sorted results and clear to relevance search */
607 se->sorted_results = nmem_malloc(se->nmem, sizeof(*se->sorted_results));
608 se->sorted_results->name = nmem_strdup(se->nmem, sp->name);
609 se->sorted_results->increasing = sp->increasing;
610 se->sorted_results->type = sp->type;
611 se->sorted_results->next = 0;
613 session_log(se, YLOG_DEBUG, "clear_set session_sort: field=%s increasing=%d type=%d configured",
614 sp->name, sp->increasing, sp->type);
616 se->reclist = reclist_create(se->nmem);
619 void session_sort(struct session *se, struct reclist_sortparms *sp,
620 const char *mergekey, const char *rank)
622 struct client_list *l;
623 const char *field = sp->name;
624 int increasing = sp->increasing;
626 int clients_research = 0;
628 session_enter(se, "session_sort");
629 session_log(se, YLOG_DEBUG, "session_sort field=%s increasing=%d type=%d",
630 field, increasing, type);
632 if (rank && (!se->rank || strcmp(se->rank, rank)))
634 /* new rank must research/reingest anyway */
637 se->rank = *rank ? xstrdup(rank) : 0;
638 clients_research = 1;
639 session_log(se, YLOG_DEBUG, "session_sort: new rank = %s",
642 if (mergekey && (!se->mergekey || strcmp(se->mergekey, mergekey)))
644 /* new mergekey must research/reingest anyway */
647 se->mergekey = *mergekey ? xstrdup(mergekey) : 0;
648 clients_research = 1;
649 session_log(se, YLOG_DEBUG, "session_sort: new mergekey = %s",
652 if (clients_research == 0)
654 struct reclist_sortparms *sr;
655 for (sr = se->sorted_results; sr; sr = sr->next)
656 if (!reclist_sortparms_cmp(sr, sp))
660 session_log(se, YLOG_DEBUG, "session_sort: field=%s increasing=%d type=%d already fetched",
661 field, increasing, type);
662 session_leave(se, "session_sort");
666 session_log(se, YLOG_DEBUG, "session_sort: field=%s increasing=%d type=%d must fetch",
667 field, increasing, type);
669 // We need to reset reclist on every sort that changes the records, not just for position
670 // So if just one client requires new searching, we need to clear set.
671 // Ask each of the client if sorting requires re-search due to native sort
672 // If it does it will require us to
673 for (l = se->clients_active; l; l = l->next)
675 struct client *cl = l->client;
676 // Assume no re-search is required.
677 client_parse_init(cl, 1);
678 clients_research += client_parse_sort(cl, sp);
680 if (!clients_research || se->clients_starting)
682 // A new sorting based on same record set
683 struct reclist_sortparms *sr = nmem_malloc(se->nmem, sizeof(*sr));
684 sr->name = nmem_strdup(se->nmem, field);
685 sr->increasing = increasing;
687 sr->next = se->sorted_results;
688 se->sorted_results = sr;
689 session_log(se, YLOG_DEBUG, "session_sort: no research/ingesting done");
690 session_leave(se, "session_sort");
694 se->clients_starting = 1;
695 session_log(se, YLOG_DEBUG,
696 "session_sort: reset results due to %d clients researching",
698 session_clear_set(se, sp);
699 session_log(se, YLOG_DEBUG, "Re- search/ingesting for clients due to change in sort order");
701 session_leave(se, "session_sort");
702 for (l = se->clients_active; l; l = l->next)
704 struct client *cl = l->client;
705 if (client_get_state(cl) == Client_Connecting ||
706 client_get_state(cl) == Client_Idle ||
707 client_get_state(cl) == Client_Working) {
708 client_start_search(cl);
712 session_log(se, YLOG_DEBUG,
713 "session_sort: %s: No re-start/ingest in show. "
714 "Wrong client state: %d",
715 client_get_id(cl), client_get_state(cl));
718 session_enter(se, "session_sort");
719 se->clients_starting = 0;
720 session_leave(se, "session_sort");
724 void session_stop(struct session *se)
726 struct client_list *l;
727 session_enter(se, "session_stop1");
728 if (se->clients_starting)
730 session_leave(se, "session_stop1");
733 se->clients_starting = 1;
734 session_leave(se, "session_stop1");
736 session_alert_watch(se, SESSION_WATCH_SHOW);
737 session_alert_watch(se, SESSION_WATCH_BYTARGET);
738 session_alert_watch(se, SESSION_WATCH_TERMLIST);
739 session_alert_watch(se, SESSION_WATCH_SHOW_PREF);
741 for (l = se->clients_active; l; l = l->next)
743 struct client *cl = l->client;
746 session_enter(se, "session_stop2");
747 se->clients_starting = 0;
748 session_leave(se, "session_stop2");
751 enum pazpar2_error_code session_search(struct session *se,
753 const char *startrecs,
757 const char **addinfo,
758 const char **addinfo2,
759 struct reclist_sortparms *sp,
760 const char *mergekey,
763 int live_channels = 0;
765 int no_failed_query = 0;
766 int no_failed_limit = 0;
767 struct client_list *l;
769 session_log(se, YLOG_DEBUG, "Search");
773 session_enter(se, "session_search0");
774 if (se->clients_starting)
776 session_leave(se, "session_search0");
777 return PAZPAR2_NO_ERROR;
779 se->clients_starting = 1;
780 session_leave(se, "session_search0");
782 if (se->settings_modified) {
783 session_remove_cached_clients(se);
786 session_reset_active_clients(se, 0);
788 session_enter(se, "session_search");
789 se->settings_modified = 0;
794 se->mergekey = *mergekey ? xstrdup(mergekey) : 0;
799 se->rank = *rank ? xstrdup(rank) : 0;
802 session_clear_set(se, sp);
803 relevance_destroy(&se->relevance);
805 live_channels = select_targets(se, filter);
808 session_leave(se, "session_search");
809 se->clients_starting = 0;
810 return PAZPAR2_NO_TARGETS;
813 facet_limits_destroy(se->facet_limits);
814 se->facet_limits = facet_limits_create(limit);
815 if (!se->facet_limits)
818 session_leave(se, "session_search");
819 se->clients_starting = 0;
820 return PAZPAR2_MALFORMED_PARAMETER_VALUE;
823 session_leave(se, "session_search");
825 session_alert_watch(se, SESSION_WATCH_SHOW);
826 session_alert_watch(se, SESSION_WATCH_BYTARGET);
827 session_alert_watch(se, SESSION_WATCH_TERMLIST);
828 session_alert_watch(se, SESSION_WATCH_SHOW_PREF);
830 for (l = se->clients_active; l; l = l->next)
833 struct client *cl = l->client;
834 client_parse_init(cl, 1);
835 if (prepare_map(se, client_get_database(cl)) < 0)
838 parse_ret = client_parse_query(cl, query, se->facet_limits, addinfo2);
841 else if (parse_ret == -2)
843 else if (parse_ret < 0)
844 no_working++; /* other error, such as bad CCL map */
847 client_parse_range(cl, startrecs, maxrecs);
848 client_parse_sort(cl, sp);
849 client_start_search(cl);
853 session_enter(se, "session_search2");
854 se->clients_starting = 0;
855 session_leave(se, "session_search2");
858 if (no_failed_query > 0)
861 return PAZPAR2_MALFORMED_PARAMETER_VALUE;
863 else if (no_failed_limit > 0)
866 return PAZPAR2_MALFORMED_PARAMETER_VALUE;
869 return PAZPAR2_NO_TARGETS;
871 return PAZPAR2_NO_ERROR;
874 // Creates a new session_database object for a database
875 static void session_init_databases_fun(void *context, struct database *db)
877 struct session *se = (struct session *) context;
878 struct session_database *new = nmem_malloc(se->session_nmem, sizeof(*new));
884 assert(db->settings);
885 new->settings = nmem_malloc(se->session_nmem,
886 sizeof(struct settings *) * db->num_settings);
887 new->num_settings = db->num_settings;
888 for (i = 0; i < db->num_settings; i++)
890 struct setting *setting = db->settings[i];
891 new->settings[i] = setting;
893 new->next = se->databases;
897 // Doesn't free memory associated with sdb -- nmem takes care of that
898 static void session_database_destroy(struct session_database *sdb)
903 // Initialize session_database list -- this represents this session's view
904 // of the database list -- subject to modification by the settings ws command
905 void session_init_databases(struct session *se)
908 predef_grep_databases(se, se->service, session_init_databases_fun);
911 // Probably session_init_databases_fun should be refactored instead of
913 static struct session_database *load_session_database(struct session *se,
916 struct database *db = new_database_inherit_settings(id, se->session_nmem, se->service->settings);
917 session_init_databases_fun((void*) se, db);
919 // New sdb is head of se->databases list
920 return se->databases;
923 // Find an existing session database. If not found, load it
924 static struct session_database *find_session_database(struct session *se,
927 struct session_database *sdb;
929 for (sdb = se->databases; sdb; sdb = sdb->next)
930 if (!strcmp(sdb->database->id, id))
932 return load_session_database(se, id);
935 // Apply a session override to a database
936 void session_apply_setting(struct session *se, const char *dbname,
937 const char *name, const char *value)
939 session_enter(se, "session_apply_setting");
941 struct session_database *sdb = find_session_database(se, dbname);
942 struct conf_service *service = se->service;
944 int offset = settings_create_offset(service, name);
946 expand_settings_array(&sdb->settings, &sdb->num_settings, offset,
948 // Force later recompute of settings-driven data structures
949 // (happens when a search starts and client connections are prepared)
950 if (offset == PZ_XSLT)
952 se->settings_modified = 1;
953 for (s = sdb->settings[offset]; s; s = s->next)
954 if (!strcmp(s->name, name) &&
955 dbname && s->target && !strcmp(dbname, s->target))
959 s = nmem_malloc(se->session_nmem, sizeof(*s));
961 s->target = nmem_strdup(se->session_nmem, dbname);
962 s->name = nmem_strdup(se->session_nmem, name);
963 s->next = sdb->settings[offset];
964 sdb->settings[offset] = s;
966 s->value = nmem_strdup(se->session_nmem, value);
968 session_leave(se, "session_apply_setting");
971 void session_destroy(struct session *se)
973 struct session_database *sdb;
974 session_log(se, YLOG_LOG, "destroy");
976 session_remove_cached_clients(se);
978 for (sdb = se->databases; sdb; sdb = sdb->next)
979 session_database_destroy(sdb);
980 normalize_cache_destroy(se->normalize_cache);
981 relevance_destroy(&se->relevance);
982 reclist_destroy(se->reclist);
985 if (nmem_total(se->nmem))
986 session_log(se, YLOG_DEBUG, "NMEN operation usage %zd", nmem_total(se->nmem));
987 if (nmem_total(se->session_nmem))
988 session_log(se, YLOG_DEBUG, "NMEN session usage %zd", nmem_total(se->session_nmem));
989 facet_limits_destroy(se->facet_limits);
990 nmem_destroy(se->nmem);
991 service_destroy(se->service);
992 yaz_mutex_destroy(&se->session_mutex);
995 size_t session_get_memory_status(struct session *session) {
999 session_enter(session, "session_get_memory_status");
1000 session_nmem = nmem_total(session->nmem);
1001 session_leave(session, "session_get_memory_status");
1002 return session_nmem;
1006 struct session *new_session(NMEM nmem, struct conf_service *service,
1007 unsigned session_id)
1010 struct session *session = nmem_malloc(nmem, sizeof(*session));
1014 sprintf(tmp_str, "session#%u", session_id);
1016 session->session_id = session_id;
1017 session_log(session, YLOG_DEBUG, "New");
1018 session->service = service;
1019 session->relevance = 0;
1020 session->total_records = 0;
1021 session->number_of_warnings_unknown_elements = 0;
1022 session->number_of_warnings_unknown_metadata = 0;
1023 session->termlists = 0;
1024 session->reclist = reclist_create(nmem);
1025 session->clients_active = 0;
1026 session->clients_cached = 0;
1027 session->settings_modified = 0;
1028 session->session_nmem = nmem;
1029 session->nmem = nmem_create();
1030 session->databases = 0;
1031 session->sorted_results = 0;
1032 session->facet_limits = 0;
1033 session->mergekey = 0;
1035 session->clients_starting = 0;
1037 for (i = 0; i <= SESSION_WATCH_MAX; i++)
1039 session->watchlist[i].data = 0;
1040 session->watchlist[i].fun = 0;
1042 session->normalize_cache = normalize_cache_create();
1043 session->session_mutex = 0;
1044 pazpar2_mutex_create(&session->session_mutex, tmp_str);
1045 session_log(session, YLOG_LOG, "create");
1051 const char * client_get_suggestions_xml(struct client *cl, WRBUF wrbuf);
1053 static struct hitsbytarget *hitsbytarget_nb(struct session *se,
1054 int *count, NMEM nmem)
1056 struct hitsbytarget *res = 0;
1057 struct client_list *l;
1060 for (l = se->clients_active; l; l = l->next)
1063 res = nmem_malloc(nmem, sizeof(*res) * sz);
1065 for (l = se->clients_active; l; l = l->next)
1067 struct client *cl = l->client;
1068 WRBUF w = wrbuf_alloc();
1069 const char *name = session_setting_oneval(client_get_database(cl),
1071 res[*count].id = client_get_id(cl);
1072 res[*count].name = *name ? name : "Unknown";
1073 res[*count].hits = client_get_hits(cl);
1074 res[*count].approximation = client_get_approximation(cl);
1075 res[*count].records = client_get_num_records(cl,
1076 &res[*count].filtered,
1078 res[*count].diagnostic =
1079 client_get_diagnostic(cl, &res[*count].message,
1080 &res[*count].addinfo);
1081 res[*count].state = client_get_state_str(cl);
1082 res[*count].connected = client_get_connection(cl) ? 1 : 0;
1083 session_settings_dump(se, client_get_database(cl), w);
1084 res[*count].settings_xml = nmem_strdup(nmem, wrbuf_cstr(w));
1087 res[*count].suggestions_xml = nmem_strdup(nmem, client_get_suggestions_xml(cl, w));
1094 struct hitsbytarget *get_hitsbytarget(struct session *se, int *count, NMEM nmem)
1096 struct hitsbytarget *p;
1097 session_enter(se, "get_hitsbytarget");
1098 p = hitsbytarget_nb(se, count, nmem);
1099 session_leave(se, "get_hitsbytarget");
1103 // Compares two hitsbytarget nodes by hitcount
1104 static int cmp_ht(const void *p1, const void *p2)
1106 const struct hitsbytarget *h1 = p1;
1107 const struct hitsbytarget *h2 = p2;
1108 return h2->hits - h1->hits;
1111 // Compares two hitsbytarget nodes by hitcount
1112 static int cmp_ht_approx(const void *p1, const void *p2)
1114 const struct hitsbytarget *h1 = p1;
1115 const struct hitsbytarget *h2 = p2;
1116 return h2->approximation - h1->approximation;
1119 static int targets_termlist_nb(WRBUF wrbuf, struct session *se, int num,
1120 NMEM nmem, int version)
1122 struct hitsbytarget *ht;
1125 ht = hitsbytarget_nb(se, &count, nmem);
1127 qsort(ht, count, sizeof(struct hitsbytarget), cmp_ht_approx);
1129 qsort(ht, count, sizeof(struct hitsbytarget), cmp_ht);
1130 for (i = 0; i < count && i < num && ht[i].hits > 0; i++)
1133 // do only print terms which have display names
1135 wrbuf_puts(wrbuf, "<term>\n");
1137 wrbuf_puts(wrbuf, "<id>");
1138 wrbuf_xmlputs(wrbuf, ht[i].id);
1139 wrbuf_puts(wrbuf, "</id>\n");
1141 wrbuf_puts(wrbuf, "<name>");
1142 if (!ht[i].name || !ht[i].name[0])
1143 wrbuf_xmlputs(wrbuf, "NO TARGET NAME");
1145 wrbuf_xmlputs(wrbuf, ht[i].name);
1146 wrbuf_puts(wrbuf, "</name>\n");
1148 wrbuf_printf(wrbuf, "<frequency>" ODR_INT_PRINTF "</frequency>\n",
1152 // Should not print if we know it isn't a approximation.
1153 wrbuf_printf(wrbuf, "<approximation>" ODR_INT_PRINTF "</approximation>\n", ht[i].approximation);
1154 wrbuf_printf(wrbuf, "<records>%d</records>\n", ht[i].records - ht[i].filtered);
1155 wrbuf_printf(wrbuf, "<filtered>%d</filtered>\n", ht[i].filtered);
1158 wrbuf_puts(wrbuf, "<state>");
1159 wrbuf_xmlputs(wrbuf, ht[i].state);
1160 wrbuf_puts(wrbuf, "</state>\n");
1162 wrbuf_printf(wrbuf, "<diagnostic>%d</diagnostic>\n",
1164 wrbuf_puts(wrbuf, "</term>\n");
1169 void perform_termlist(struct http_channel *c, struct session *se,
1170 const char *name, int num, int version)
1173 NMEM nmem_tmp = nmem_create();
1180 nmem_strsplit(nmem_tmp, ",", name, &names, &num_names);
1182 session_enter(se, "perform_termlist");
1184 for (j = 0; j < num_names; j++)
1187 int must_generate_empty = 1; /* bug 5350 */
1189 struct named_termlist *t = se->termlists;
1190 for (; t; t = t->next)
1193 if (!strcmp(names[j], tname) || !strcmp(names[j], "*"))
1195 struct termlist_score **p = 0;
1198 wrbuf_puts(c->wrbuf, "<list name=\"");
1199 wrbuf_xmlputs(c->wrbuf, tname);
1200 wrbuf_puts(c->wrbuf, "\">\n");
1201 must_generate_empty = 0;
1203 p = termlist_highscore(t->termlist, &len, nmem_tmp);
1207 for (i = 0; i < len && i < num; i++)
1209 // prevent sending empty term elements
1210 if (!p[i]->display_term || !p[i]->display_term[0])
1213 wrbuf_puts(c->wrbuf, "<term>");
1214 wrbuf_puts(c->wrbuf, "<name>");
1215 wrbuf_xmlputs(c->wrbuf, p[i]->display_term);
1216 wrbuf_puts(c->wrbuf, "</name>");
1218 wrbuf_printf(c->wrbuf,
1219 "<frequency>%d</frequency>",
1221 wrbuf_puts(c->wrbuf, "</term>\n");
1224 wrbuf_puts(c->wrbuf, "</list>\n");
1228 if (!strcmp(names[j], tname) || !strcmp(names[j], "*"))
1230 wrbuf_puts(c->wrbuf, "<list name=\"");
1231 wrbuf_xmlputs(c->wrbuf, tname);
1232 wrbuf_puts(c->wrbuf, "\">\n");
1234 targets_termlist_nb(c->wrbuf, se, num, c->nmem, version);
1235 wrbuf_puts(c->wrbuf, "</list>\n");
1236 must_generate_empty = 0;
1238 if (must_generate_empty)
1240 wrbuf_puts(c->wrbuf, "<list name=\"");
1241 wrbuf_xmlputs(c->wrbuf, names[j]);
1242 wrbuf_puts(c->wrbuf, "\"/>\n");
1245 session_leave(se, "perform_termlist");
1246 nmem_destroy(nmem_tmp);
1249 #ifdef MISSING_HEADERS
1250 void report_nmem_stats(void)
1252 size_t in_use, is_free;
1254 nmem_get_memory_in_use(&in_use);
1255 nmem_get_memory_free(&is_free);
1257 yaz_log(YLOG_LOG, "nmem stat: use=%ld free=%ld",
1258 (long) in_use, (long) is_free);
1262 struct record_cluster *show_single_start(struct session *se, const char *id,
1263 struct record_cluster **prev_r,
1264 struct record_cluster **next_r)
1266 struct record_cluster *r = 0;
1268 session_enter(se, "show_single_start");
1271 reclist_limit(se->reclist, se, 1);
1273 reclist_enter(se->reclist);
1274 while ((r = reclist_read_record(se->reclist)))
1276 if (!strcmp(r->recid, id))
1278 *next_r = reclist_read_record(se->reclist);
1283 reclist_leave(se->reclist);
1285 session_leave(se, "show_single_start");
1289 void show_single_stop(struct session *se, struct record_cluster *rec)
1291 session_leave(se, "show_single_stop");
1295 int session_fetch_more(struct session *se)
1297 struct client_list *l;
1300 for (l = se->clients_active; l; l = l->next)
1302 struct client *cl = l->client;
1303 if (client_get_state(cl) == Client_Idle)
1305 if (client_fetch_more(cl))
1307 session_log(se, YLOG_LOG, "%s: more to fetch",
1314 int ingest_failures;
1315 int record_failures;
1316 int num = client_get_num_records(
1317 cl, &filtered, &ingest_failures, &record_failures);
1319 session_log(se, YLOG_LOG, "%s: hits=" ODR_INT_PRINTF
1320 " fetched=%d filtered=%d",
1322 client_get_hits(cl),
1324 if (ingest_failures || record_failures)
1326 session_log(se, YLOG_WARN, "%s:"
1327 " ingest failures=%d record failures=%d",
1329 ingest_failures, record_failures);
1335 session_log(se, YLOG_LOG, "%s: no fetch due to state=%s",
1336 client_get_id(cl), client_get_state_str(cl));
1343 struct record_cluster **show_range_start(struct session *se,
1344 struct reclist_sortparms *sp,
1345 int start, int *num, int *total,
1346 Odr_int *sumhits, Odr_int *approx_hits,
1347 void (*show_records_ready)(void *data),
1348 struct http_channel *chan)
1350 struct record_cluster **recs = 0;
1351 struct reclist_sortparms *spp;
1352 struct client_list *l;
1355 yaz_timing_t t = yaz_timing_create();
1357 session_enter(se, "show_range_start");
1361 reclist_limit(se->reclist, se, 0);
1364 for (spp = sp; spp; spp = spp->next)
1365 if (spp->type == Metadata_type_relevance)
1367 relevance_prepare_read(se->relevance, se->reclist);
1370 for (l = se->clients_active; l; l = l->next) {
1371 *sumhits += client_get_hits(l->client);
1372 *approx_hits += client_get_approximation(l->client);
1375 reclist_sort(se->reclist, sp);
1377 reclist_enter(se->reclist);
1378 *total = reclist_get_num_records(se->reclist);
1380 for (l = se->clients_active; l; l = l->next)
1381 client_update_show_stat(l->client, 0);
1383 for (i = 0; i < start; i++)
1385 struct record_cluster *r = reclist_read_record(se->reclist);
1393 struct record *rec = r->records;
1394 for (;rec; rec = rec->next)
1395 client_update_show_stat(rec->client, 1);
1398 recs = nmem_malloc(se->nmem, (*num > 0 ? *num : 1) * sizeof(*recs));
1399 for (i = 0; i < *num; i++)
1401 struct record_cluster *r = reclist_read_record(se->reclist);
1409 struct record *rec = r->records;
1410 for (;rec; rec = rec->next)
1411 client_update_show_stat(rec->client, 1);
1415 reclist_leave(se->reclist);
1418 session_log(se, YLOG_LOG, "show %6.5f %3.2f %3.2f",
1419 yaz_timing_get_real(t), yaz_timing_get_user(t),
1420 yaz_timing_get_sys(t));
1421 yaz_timing_destroy(&t);
1424 if (!session_fetch_more(se))
1425 session_log(se, YLOG_LOG, "can not fetch more");
1428 show_range_stop(se, recs);
1429 session_log(se, YLOG_LOG, "fetching more in progress");
1430 if (session_set_watch(se, SESSION_WATCH_SHOW,
1431 show_records_ready, chan, chan))
1433 session_log(se, YLOG_WARN, "Ignoring show block");
1434 session_enter(se, "show_range_start");
1438 session_log(se, YLOG_LOG, "session watch OK");
1445 void show_range_stop(struct session *se, struct record_cluster **recs)
1447 session_leave(se, "show_range_stop");
1450 void statistics(struct session *se, struct statistics *stat)
1452 struct client_list *l;
1455 memset(stat, 0, sizeof(*stat));
1457 for (l = se->clients_active; l; l = l->next)
1459 struct client *cl = l->client;
1460 if (!client_get_connection(cl))
1461 stat->num_no_connection++;
1462 stat->num_hits += client_get_hits(cl);
1463 switch (client_get_state(cl))
1465 case Client_Connecting: stat->num_connecting++; break;
1466 case Client_Working: stat->num_working++; break;
1467 case Client_Idle: stat->num_idle++; break;
1468 case Client_Failed: stat->num_failed++; break;
1469 case Client_Error: stat->num_error++; break;
1474 stat->num_records = se->total_records;
1476 stat->num_clients = count;
1479 static struct record_metadata *record_metadata_init(
1480 NMEM nmem, const char *value, enum conf_metadata_type type,
1481 struct _xmlAttr *attr)
1483 struct record_metadata *rec_md = record_metadata_create(nmem);
1484 struct record_metadata_attr **attrp = &rec_md->attributes;
1486 for (; attr; attr = attr->next)
1488 if (attr->children && attr->children->content)
1490 if (strcmp((const char *) attr->name, "type")
1491 && strcmp((const char *) attr->name, "empty"))
1492 { /* skip the "type" + "empty" attribute..
1493 The "Type" is already part of the element in output
1494 (md-%s) and so repeating it here is redundant */
1495 *attrp = nmem_malloc(nmem, sizeof(**attrp));
1497 nmem_strdup(nmem, (const char *) attr->name);
1499 nmem_strdup(nmem, (const char *) attr->children->content);
1500 attrp = &(*attrp)->next;
1508 case Metadata_type_generic:
1509 case Metadata_type_skiparticle:
1510 if (strstr(value, "://")) /* looks like a URL */
1511 rec_md->data.text.disp = nmem_strdup(nmem, value);
1513 rec_md->data.text.disp =
1514 normalize7bit_generic(nmem_strdup(nmem, value), " ,/.:([");
1515 rec_md->data.text.sort = 0;
1516 rec_md->data.text.snippet = 0;
1518 case Metadata_type_year:
1519 case Metadata_type_date:
1524 if (type == Metadata_type_date)
1526 if (extract7bit_dates((char *) value, &first, &last, longdate) < 0)
1529 rec_md->data.number.min = first;
1530 rec_md->data.number.max = last;
1533 case Metadata_type_float:
1534 rec_md->data.fnumber = atof(value);
1536 case Metadata_type_relevance:
1537 case Metadata_type_position:
1543 static void mergekey_norm_wr(pp2_charset_fact_t charsets,
1544 WRBUF norm_wr, const char *value)
1546 const char *norm_str;
1547 pp2_charset_token_t prt =
1548 pp2_charset_token_create(charsets, "mergekey");
1550 pp2_charset_token_first(prt, value, 0);
1551 while ((norm_str = pp2_charset_token_next(prt)))
1555 if (wrbuf_len(norm_wr))
1556 wrbuf_puts(norm_wr, " ");
1557 wrbuf_puts(norm_wr, norm_str);
1560 pp2_charset_token_destroy(prt);
1563 static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
1564 struct conf_service *service, WRBUF norm_wr)
1568 for (n = root->children; n; n = n->next)
1570 if (n->type != XML_ELEMENT_NODE)
1572 if (!strcmp((const char *) n->name, "metadata"))
1574 xmlChar *type = xmlGetProp(n, (xmlChar *) "type");
1576 yaz_log(YLOG_FATAL, "Missing type attribute on metadata element. Skipping!");
1578 else if (!strcmp(name, (const char *) type))
1580 xmlChar *value = xmlNodeListGetString(doc, n->children, 1);
1581 if (value && *value)
1583 if (wrbuf_len(norm_wr) > 0)
1584 wrbuf_puts(norm_wr, " ");
1585 wrbuf_puts(norm_wr, name);
1586 mergekey_norm_wr(service->charsets, norm_wr,
1587 (const char *) value);
1599 static const char *get_mergekey(xmlDoc *doc, xmlNode *root,
1600 struct client *cl, int record_no,
1601 struct conf_service *service, NMEM nmem,
1602 const char *session_mergekey)
1604 char *mergekey_norm = 0;
1605 WRBUF norm_wr = wrbuf_alloc();
1608 if (session_mergekey)
1612 nmem_strsplit_escape2(nmem, ",", session_mergekey, &values,
1615 for (i = 0; i < num; i++)
1616 get_mergekey_from_doc(doc, root, values[i], service, norm_wr);
1618 else if ((mergekey = xmlGetProp(root, (xmlChar *) "mergekey")))
1620 mergekey_norm_wr(service->charsets, norm_wr, (const char *) mergekey);
1625 /* no mergekey defined in XSL. Look for mergekey metadata instead */
1627 for (field_id = 0; field_id < service->num_metadata; field_id++)
1629 struct conf_metadata *ser_md = &service->metadata[field_id];
1630 if (ser_md->mergekey != Metadata_mergekey_no)
1632 int r = get_mergekey_from_doc(doc, root, ser_md->name,
1634 if (r == 0 && ser_md->mergekey == Metadata_mergekey_required)
1636 /* no mergekey on this one and it is required..
1637 Generate unique key instead */
1638 wrbuf_rewind(norm_wr);
1645 /* generate unique key if none is not generated already or is empty */
1646 if (wrbuf_len(norm_wr) == 0)
1648 wrbuf_printf(norm_wr, "position: %s-%d",
1649 client_get_id(cl), record_no);
1653 const char *lead = "content: ";
1654 wrbuf_insert(norm_wr, 0, lead, strlen(lead));
1656 if (wrbuf_len(norm_wr) > 0)
1657 mergekey_norm = nmem_strdup(nmem, wrbuf_cstr(norm_wr));
1658 wrbuf_destroy(norm_wr);
1659 return mergekey_norm;
1662 /** \brief see if metadata for pz:recordfilter exists
1663 \param root xml root element of normalized record
1664 \param sdb session database for client
1665 \retval 0 if there is no metadata for pz:recordfilter
1666 \retval 1 if there is metadata for pz:recordfilter
1668 If there is no pz:recordfilter defined, this function returns 1
1672 static int check_record_filter(xmlNode *root, struct session_database *sdb)
1677 s = session_setting_oneval(sdb, PZ_RECORDFILTER);
1682 for (n = root->children; n; n = n->next)
1684 if (n->type != XML_ELEMENT_NODE)
1686 if (!strcmp((const char *) n->name, "metadata"))
1688 xmlChar *type = xmlGetProp(n, (xmlChar *) "type");
1695 if ((eq = strchr(s, '=')))
1697 else if ((eq = strchr(s, '~')))
1703 if (len == strlen((const char *)type) &&
1704 !memcmp((const char *) type, s, len))
1706 xmlChar *value = xmlNodeGetContent(n);
1707 if (value && *value)
1710 (substring && strstr((const char *) value, eq+1)) ||
1711 (!substring && !strcmp((const char *) value, eq + 1)))
1723 static int ingest_to_cluster(struct client *cl,
1727 struct record_metadata_attr *mergekey);
1729 static int ingest_sub_record(struct client *cl, xmlDoc *xdoc, xmlNode *root,
1730 int record_no, NMEM nmem,
1731 struct session_database *sdb,
1732 struct record_metadata_attr *mergekeys)
1735 struct session *se = client_get_session(cl);
1737 if (!check_record_filter(root, sdb))
1739 session_log(se, YLOG_LOG,
1740 "Filtered out record no %d from %s",
1741 record_no, sdb->database->id);
1744 session_enter(se, "ingest_sub_record");
1745 if (client_get_session(cl) == se && se->relevance)
1746 ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekeys);
1747 session_leave(se, "ingest_sub_record");
1752 /** \brief ingest XML record
1753 \param cl client holds the result set for record
1754 \param rec record buffer (0 terminated)
1755 \param record_no record position (1, 2, ..)
1756 \param nmem working NMEM
1761 int ingest_record(struct client *cl, const char *rec,
1762 int record_no, NMEM nmem)
1764 struct session *se = client_get_session(cl);
1765 struct session_database *sdb = client_get_database(cl);
1766 struct conf_service *service = se->service;
1767 xmlDoc *xdoc = normalize_record(se, sdb, service, rec, nmem);
1768 int r = ingest_xml_record(cl, xdoc, record_no, nmem, 0);
1769 client_store_xdoc(cl, record_no, xdoc);
1773 int ingest_xml_record(struct client *cl, xmlDoc *xdoc,
1774 int record_no, NMEM nmem, int cached_copy)
1776 struct session *se = client_get_session(cl);
1777 struct session_database *sdb = client_get_database(cl);
1778 struct conf_service *service = se->service;
1784 if (global_parameters.dump_records)
1786 session_log(se, YLOG_LOG, "Normalized record from %s",
1791 root = xmlDocGetRootElement(xdoc);
1793 if (!strcmp((const char *) root->name, "cluster"))
1795 int no_merge_keys = 0;
1796 int no_merge_dups = 0;
1798 struct record_metadata_attr *mk = 0;
1800 for (sroot = root->children; sroot; sroot = sroot->next)
1801 if (sroot->type == XML_ELEMENT_NODE &&
1802 !strcmp((const char *) sroot->name, "record"))
1804 struct record_metadata_attr **mkp;
1805 const char *mergekey_norm =
1806 get_mergekey(xdoc, sroot, cl, record_no, service, nmem,
1813 for (mkp = &mk; *mkp; mkp = &(*mkp)->next)
1814 if (!strcmp((*mkp)->value, mergekey_norm))
1818 *mkp = (struct record_metadata_attr*)
1819 nmem_malloc(nmem, sizeof(**mkp));
1821 (*mkp)->value = nmem_strdup(nmem, mergekey_norm);
1828 if (no_merge_keys > 1 || no_merge_dups > 0)
1830 yaz_log(YLOG_LOG, "Got %d mergekeys, %d dups for position %d",
1831 no_merge_keys, no_merge_dups, record_no);
1833 for (sroot = root->children; !r && sroot; sroot = sroot->next)
1834 if (sroot->type == XML_ELEMENT_NODE &&
1835 !strcmp((const char *) sroot->name, "record"))
1838 insert_settings_values(sdb, xdoc, root, service);
1839 r = ingest_sub_record(cl, xdoc, sroot, record_no, nmem, sdb,
1843 else if (!strcmp((const char *) root->name, "record"))
1845 const char *mergekey_norm =
1846 get_mergekey(xdoc, root, cl, record_no, service, nmem,
1850 struct record_metadata_attr *mk = (struct record_metadata_attr*)
1851 nmem_malloc(nmem, sizeof(*mk));
1853 mk->value = nmem_strdup(nmem, mergekey_norm);
1857 insert_settings_values(sdb, xdoc, root, service);
1858 r = ingest_sub_record(cl, xdoc, root, record_no, nmem, sdb, mk);
1863 session_log(se, YLOG_WARN, "Bad pz root element: %s",
1864 (const char *) root->name);
1871 // struct conf_metadata *ser_md = &service->metadata[md_field_id];
1872 // struct record_metadata *rec_md = record->metadata[md_field_id];
1873 static int match_metadata_local(struct conf_service *service,
1874 struct conf_metadata *ser_md,
1875 struct record_metadata *rec_md0,
1876 char **values, int num_v)
1879 struct record_metadata *rec_md = rec_md0;
1881 WRBUF text_wr = wrbuf_alloc();
1882 for (i = 0; i < num_v; )
1886 if (ser_md->type == Metadata_type_year
1887 || ser_md->type == Metadata_type_date)
1889 int y = atoi(values[i]);
1890 if (y >= rec_md->data.number.min
1891 && y <= rec_md->data.number.max)
1898 val_wr = wrbuf_alloc();
1899 mergekey_norm_wr(service->charsets, val_wr, values[i]);
1901 wrbuf_rewind(text_wr);
1902 mergekey_norm_wr(service->charsets, text_wr,
1903 rec_md->data.text.disp);
1904 if (!strcmp(wrbuf_cstr(val_wr), wrbuf_cstr(text_wr)))
1907 rec_md = rec_md->next;
1912 wrbuf_destroy(val_wr);
1917 wrbuf_destroy(val_wr);
1918 wrbuf_destroy(text_wr);
1919 return i < num_v ? 1 : 0;
1922 int session_check_cluster_limit(struct session *se, struct record_cluster *rec)
1925 struct conf_service *service = se->service;
1929 NMEM nmem_tmp = nmem_create();
1931 for (i = 0; (name = facet_limits_get(se->facet_limits, i, &value)); i++)
1934 for (j = 0; j < service->num_metadata; j++)
1936 struct conf_metadata *md = service->metadata + j;
1937 if (!strcmp(md->name, name) && md->limitcluster)
1942 conf_service_metadata_field_id(service,
1945 if (md_field_id < 0)
1951 nmem_strsplit_escape2(nmem_tmp, "|", value, &values,
1954 if (!match_metadata_local(service,
1955 &service->metadata[md_field_id],
1956 rec->metadata[md_field_id],
1965 nmem_destroy(nmem_tmp);
1969 // Skip record on non-zero
1970 static int check_limit_local(struct client *cl,
1971 struct record *record,
1974 int skip_record = 0;
1975 struct session *se = client_get_session(cl);
1976 struct conf_service *service = se->service;
1977 NMEM nmem_tmp = nmem_create();
1978 struct session_database *sdb = client_get_database(cl);
1980 while (!skip_record)
1986 client_get_facet_limit_local(cl, sdb, &l, nmem_tmp,
1991 if (!strcmp(name, "*"))
1993 for (md_field_id = 0; md_field_id < service->num_metadata;
1996 if (match_metadata_local(
1998 &service->metadata[md_field_id],
1999 record->metadata[md_field_id],
2003 if (md_field_id == service->num_metadata)
2008 md_field_id = conf_service_metadata_field_id(service, name);
2009 if (md_field_id < 0)
2014 if (!match_metadata_local(
2016 &service->metadata[md_field_id],
2017 record->metadata[md_field_id],
2024 nmem_destroy(nmem_tmp);
2028 static int ingest_to_cluster(struct client *cl,
2032 struct record_metadata_attr *merge_keys)
2037 struct session *se = client_get_session(cl);
2038 struct conf_service *service = se->service;
2039 int term_factor = 1;
2040 struct record_cluster *cluster;
2041 struct record_metadata **metadata0;
2042 struct session_database *sdb = client_get_database(cl);
2043 NMEM ingest_nmem = 0;
2044 char **rank_values = 0;
2046 struct record *record = record_create(se->nmem,
2047 service->num_metadata,
2048 service->num_sortkeys, cl,
2051 for (n = root->children; n; n = n->next)
2059 if (n->type != XML_ELEMENT_NODE)
2061 if (!strcmp((const char *) n->name, "metadata"))
2063 struct conf_metadata *ser_md = 0;
2064 struct record_metadata **wheretoput = 0;
2065 struct record_metadata *rec_md = 0;
2066 int md_field_id = -1;
2068 type = xmlGetProp(n, (xmlChar *) "type");
2069 value = xmlNodeListGetString(xdoc, n->children, 1);
2072 if (!value || !*value)
2074 xmlChar *empty = xmlGetProp(n, (xmlChar *) "empty");
2082 = conf_service_metadata_field_id(service, (const char *) type);
2083 if (md_field_id < 0)
2085 if (se->number_of_warnings_unknown_metadata == 0)
2087 session_log(se, YLOG_WARN,
2088 "Ignoring unknown metadata element: %s", type);
2090 se->number_of_warnings_unknown_metadata++;
2094 ser_md = &service->metadata[md_field_id];
2096 // non-merged metadata
2097 rec_md = record_metadata_init(se->nmem, (const char *) value,
2098 ser_md->type, n->properties);
2101 session_log(se, YLOG_WARN, "bad metadata data '%s' "
2102 "for element '%s'", value, type);
2106 if (ser_md->type == Metadata_type_generic)
2108 WRBUF w = wrbuf_alloc();
2109 if (relevance_snippet(se->relevance,
2110 (char*) value, ser_md->name, w))
2111 rec_md->data.text.snippet = nmem_strdup(se->nmem,
2117 wheretoput = &record->metadata[md_field_id];
2119 wheretoput = &(*wheretoput)->next;
2120 *wheretoput = rec_md;
2124 if (check_limit_local(cl, record, record_no))
2132 cluster = reclist_insert(se->reclist, se->relevance, service, record,
2133 merge_keys, &se->total_merged);
2140 return 0; // complete match with existing record
2144 const char *use_term_factor_str =
2145 session_setting_oneval(sdb, PZ_TERMLIST_TERM_FACTOR);
2146 if (use_term_factor_str && use_term_factor_str[0] == '1')
2148 int maxrecs = client_get_maxrecs(cl);
2149 int hits = (int) client_get_hits(cl);
2150 term_factor = MAX(hits, maxrecs) / MAX(1, maxrecs);
2151 assert(term_factor >= 1);
2152 session_log(se, YLOG_DEBUG, "Using term factor: %d (%d / %d)",
2153 term_factor, MAX(hits, maxrecs), MAX(1, maxrecs));
2157 if (global_parameters.dump_records)
2158 session_log(se, YLOG_LOG, "Cluster id %s from %s (#%d)", cluster->recid,
2159 sdb->database->id, record_no);
2161 // original metadata, to check if first existence of a field
2162 metadata0 = xmalloc(sizeof(*metadata0) * service->num_metadata);
2163 memcpy(metadata0, cluster->metadata,
2164 sizeof(*metadata0) * service->num_metadata);
2166 ingest_nmem = nmem_create();
2169 yaz_log(YLOG_LOG, "local in sort : %s", se->rank);
2170 nmem_strsplit_escape2(ingest_nmem, ",", se->rank, &rank_values,
2171 &rank_num, 1, '\\', 1);
2174 // now parsing XML record and adding data to cluster or record metadata
2175 for (n = root->children; n; n = n->next)
2183 if (n->type != XML_ELEMENT_NODE)
2185 if (!strcmp((const char *) n->name, "metadata"))
2187 struct conf_metadata *ser_md = 0;
2188 struct conf_sortkey *ser_sk = 0;
2189 struct record_metadata **wheretoput = 0;
2190 struct record_metadata *rec_md = 0;
2191 int md_field_id = -1;
2192 int sk_field_id = -1;
2193 const char *rank = 0;
2194 xmlChar *xml_rank = 0;
2196 type = xmlGetProp(n, (xmlChar *) "type");
2197 value = xmlNodeListGetString(xdoc, n->children, 1);
2199 if (!type || !value || !*value)
2203 = conf_service_metadata_field_id(service, (const char *) type);
2204 if (md_field_id < 0)
2207 ser_md = &service->metadata[md_field_id];
2209 if (ser_md->sortkey_offset >= 0)
2211 sk_field_id = ser_md->sortkey_offset;
2212 ser_sk = &service->sortkeys[sk_field_id];
2216 rec_md = record_metadata_init(se->nmem, (const char *) value,
2219 // see if the field was not in cluster already (from beginning)
2227 for (i = 0; i < rank_num; i++)
2229 const char *val = rank_values[i];
2230 const char *cp = strchr(val, '=');
2233 if ((cp - val) == strlen((const char *) type)
2234 && !memcmp(val, type, cp - val))
2243 xml_rank = xmlGetProp(n, (xmlChar *) "rank");
2244 rank = xml_rank ? (const char *) xml_rank : ser_md->rank;
2247 wheretoput = &cluster->metadata[md_field_id];
2249 if (ser_md->merge == Metadata_merge_first)
2251 if (!metadata0[md_field_id])
2254 wheretoput = &(*wheretoput)->next;
2255 *wheretoput = rec_md;
2258 else if (ser_md->merge == Metadata_merge_unique)
2262 if (!strcmp((const char *) (*wheretoput)->data.text.disp,
2263 rec_md->data.text.disp))
2265 wheretoput = &(*wheretoput)->next;
2268 *wheretoput = rec_md;
2270 else if (ser_md->merge == Metadata_merge_longest)
2273 || strlen(rec_md->data.text.disp)
2274 > strlen((*wheretoput)->data.text.disp))
2276 *wheretoput = rec_md;
2279 pp2_charset_token_t prt;
2280 const char *sort_str = 0;
2282 ser_sk->type == Metadata_type_skiparticle;
2284 if (!cluster->sortkeys[sk_field_id])
2285 cluster->sortkeys[sk_field_id] =
2286 nmem_malloc(se->nmem,
2287 sizeof(union data_types));
2290 pp2_charset_token_create(service->charsets, "sort");
2292 pp2_charset_token_first(prt, rec_md->data.text.disp,
2295 pp2_charset_token_next(prt);
2297 sort_str = pp2_get_sort(prt);
2299 cluster->sortkeys[sk_field_id]->text.disp =
2300 rec_md->data.text.disp;
2303 sort_str = rec_md->data.text.disp;
2304 session_log(se, YLOG_WARN,
2305 "Could not make sortkey. Bug #1858");
2307 cluster->sortkeys[sk_field_id]->text.sort =
2308 nmem_strdup(se->nmem, sort_str);
2309 pp2_charset_token_destroy(prt);
2313 else if (ser_md->merge == Metadata_merge_all)
2316 wheretoput = &(*wheretoput)->next;
2317 *wheretoput = rec_md;
2319 else if (ser_md->merge == Metadata_merge_range)
2323 *wheretoput = rec_md;
2325 cluster->sortkeys[sk_field_id]
2330 int this_min = rec_md->data.number.min;
2331 int this_max = rec_md->data.number.max;
2332 if (this_min < (*wheretoput)->data.number.min)
2333 (*wheretoput)->data.number.min = this_min;
2334 if (this_max > (*wheretoput)->data.number.max)
2335 (*wheretoput)->data.number.max = this_max;
2339 // ranking of _all_ fields enabled ...
2342 relevance_countwords(se->relevance, cluster,
2343 (char *) value, rank, ser_md->name);
2345 // construct facets ... unless the client already has reported them
2346 if (ser_md->termlist && !client_has_facet(cl, (char *) type))
2348 if (ser_md->type == Metadata_type_year)
2351 sprintf(year, "%d", rec_md->data.number.max);
2353 add_facet(se, (char *) type, year, term_factor);
2354 if (rec_md->data.number.max != rec_md->data.number.min)
2356 sprintf(year, "%d", rec_md->data.number.min);
2357 add_facet(se, (char *) type, year, term_factor);
2361 add_facet(se, (char *) type, (char *) value, term_factor);
2373 if (se->number_of_warnings_unknown_elements == 0)
2374 session_log(se, YLOG_WARN,
2375 "Unexpected element in internal record: %s", n->name);
2376 se->number_of_warnings_unknown_elements++;
2384 nmem_destroy(ingest_nmem);
2386 relevance_donerecord(se->relevance, cluster);
2387 se->total_records++;
2392 void session_log(struct session *s, int level, const char *fmt, ...)
2398 yaz_vsnprintf(buf, sizeof(buf)-30, fmt, ap);
2399 yaz_log(level, "Session %u: %s", s ? s->session_id : 0, buf);
2407 * c-file-style: "Stroustrup"
2408 * indent-tabs-mode: nil
2410 * vim: shiftwidth=4 tabstop=8 expandtab