1 /* $Id: alvis.c,v 1.17 2007-04-16 21:54:37 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
27 #include <yaz/diagbib1.h>
28 #include <yaz/tpath.h>
29 #include <yaz/oid_db.h>
31 #include <libxml/xmlversion.h>
32 #include <libxml/parser.h>
33 #include <libxml/tree.h>
34 #include <libxml/xmlIO.h>
35 #include <libxml/xmlreader.h>
36 #include <libxslt/transform.h>
37 #include <libxslt/xsltutils.h>
40 #include <libexslt/exslt.h>
43 #include <idzebra/util.h>
44 #include <idzebra/recctrl.h>
46 struct filter_schema {
48 const char *identifier;
49 const char *stylesheet;
50 struct filter_schema *next;
51 const char *default_schema;
52 /* char default_schema; */
53 const char *include_snippet;
54 xsltStylesheetPtr stylesheet_xsp;
61 const char *profile_path;
63 const char *split_path;
65 struct filter_schema *schemas;
66 xmlTextReaderPtr reader;
69 #define ZEBRA_SCHEMA_XSLT_NS "http://indexdata.dk/zebra/xslt/1"
71 #define XML_STRCMP(a,b) strcmp((char*)a, b)
72 #define XML_STRLEN(a) strlen((char*)a)
74 static const char *zebra_xslt_ns = ZEBRA_SCHEMA_XSLT_NS;
76 static void set_param_xml(const char **params, const char *name,
77 const char *value, ODR odr)
86 static void set_param_str(const char **params, const char *name,
87 const char *value, ODR odr)
89 char *quoted = odr_malloc(odr, 3 + strlen(value));
90 sprintf(quoted, "'%s'", value);
98 static void set_param_int(const char **params, const char *name,
101 char *quoted = odr_malloc(odr, 30); /* 25 digits enough for 2^64 */
104 sprintf(quoted, "'" ZINT_FORMAT "'", value);
110 #define ENABLE_INPUT_CALLBACK 0
112 #if ENABLE_INPUT_CALLBACK
113 static int zebra_xmlInputMatchCallback (char const *filename)
115 yaz_log(YLOG_LOG, "match %s", filename);
119 static void * zebra_xmlInputOpenCallback (char const *filename)
124 static int zebra_xmlInputReadCallback (void * context, char * buffer, int len)
129 static int zebra_xmlInputCloseCallback (void * context)
135 static void *filter_init(Res res, RecType recType)
137 struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
140 tinfo->full_name = 0;
141 tinfo->profile_path = 0;
142 tinfo->split_level = 0;
143 tinfo->split_path = 0;
144 tinfo->odr = odr_createmem(ODR_ENCODE);
152 #if ENABLE_INPUT_CALLBACK
153 xmlRegisterDefaultInputCallbacks();
154 xmlRegisterInputCallbacks(zebra_xmlInputMatchCallback,
155 zebra_xmlInputOpenCallback,
156 zebra_xmlInputReadCallback,
157 zebra_xmlInputCloseCallback);
162 static int attr_content(struct _xmlAttr *attr, const char *name,
163 const char **dst_content)
165 if (!XML_STRCMP(attr->name, name) && attr->children
166 && attr->children->type == XML_TEXT_NODE)
168 *dst_content = (const char *)(attr->children->content);
174 static void destroy_schemas(struct filter_info *tinfo)
176 struct filter_schema *schema = tinfo->schemas;
179 struct filter_schema *schema_next = schema->next;
180 if (schema->stylesheet_xsp)
181 xsltFreeStylesheet(schema->stylesheet_xsp);
183 schema = schema_next;
188 xmlFreeDoc(tinfo->doc);
192 static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
194 char tmp_full_name[1024];
196 tinfo->fname = xstrdup(fname);
198 if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path,
199 NULL, tmp_full_name))
200 tinfo->full_name = xstrdup(tmp_full_name);
202 tinfo->full_name = xstrdup(tinfo->fname);
204 yaz_log(YLOG_LOG, "alvis filter: loading config file %s", tinfo->full_name);
206 tinfo->doc = xmlParseFile(tinfo->full_name);
210 yaz_log(YLOG_WARN, "alvis filter: could not parse config file %s",
216 ptr = xmlDocGetRootElement(tinfo->doc);
217 if (!ptr || ptr->type != XML_ELEMENT_NODE
218 || XML_STRCMP(ptr->name, "schemaInfo"))
221 "alvis filter: config file %s :"
222 " expected root element <schemaInfo>",
227 for (ptr = ptr->children; ptr; ptr = ptr->next)
229 if (ptr->type != XML_ELEMENT_NODE)
231 if (!XML_STRCMP(ptr->name, "schema"))
233 struct _xmlAttr *attr;
234 struct filter_schema *schema = xmalloc(sizeof(*schema));
236 schema->identifier = 0;
237 schema->stylesheet = 0;
238 schema->default_schema = 0;
239 schema->next = tinfo->schemas;
240 schema->stylesheet_xsp = 0;
241 schema->include_snippet = 0;
242 tinfo->schemas = schema;
243 for (attr = ptr->properties; attr; attr = attr->next)
245 attr_content(attr, "identifier", &schema->identifier);
246 attr_content(attr, "name", &schema->name);
247 attr_content(attr, "stylesheet", &schema->stylesheet);
248 attr_content(attr, "default", &schema->default_schema);
249 attr_content(attr, "snippet", &schema->include_snippet);
251 /*yaz_log(YLOG_LOG, "XSLT add %s %s %s",
252 schema->name, schema->identifier, schema->stylesheet); */
254 /* find requested schema */
256 if (schema->stylesheet)
258 char tmp_xslt_full_name[1024];
259 if (!yaz_filepath_resolve(schema->stylesheet, tinfo->profile_path,
260 NULL, tmp_xslt_full_name))
263 "alvis filter: stylesheet %s not found in path %s",
264 schema->stylesheet, tinfo->profile_path);
267 schema->stylesheet_xsp
268 = xsltParseStylesheetFile((const xmlChar*) tmp_xslt_full_name);
269 if (!schema->stylesheet_xsp)
272 "alvis filter: could not parse xslt stylesheet %s",
278 else if (!XML_STRCMP(ptr->name, "split"))
280 struct _xmlAttr *attr;
281 for (attr = ptr->properties; attr; attr = attr->next)
283 const char *split_level_str = 0;
284 attr_content(attr, "level", &split_level_str);
286 split_level_str ? atoi(split_level_str) : 0;
291 yaz_log(YLOG_WARN, "Bad element %s in %s", ptr->name, fname);
298 static struct filter_schema *lookup_schema(struct filter_info *tinfo,
301 struct filter_schema *schema;
303 for (schema = tinfo->schemas; schema; schema = schema->next)
305 /* find requested schema */
308 if (schema->identifier && !strcmp(schema->identifier, est))
311 if (schema->name && !strcmp(schema->name, est))
314 /* or return default schema if defined */
315 else if (schema->default_schema)
319 /* return first schema if no default schema defined */
321 return tinfo->schemas;
326 static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
328 struct filter_info *tinfo = clientData;
331 yaz_log(YLOG_WARN, "alvis filter: need config file");
335 if (tinfo->fname && !strcmp(args, tinfo->fname))
338 tinfo->profile_path = res_get(res, "profilePath");
339 yaz_log(YLOG_LOG, "alvis filter: profilePath %s", tinfo->profile_path);
341 destroy_schemas(tinfo);
342 return create_schemas(tinfo, args);
345 static void filter_destroy(void *clientData)
347 struct filter_info *tinfo = clientData;
348 destroy_schemas(tinfo);
349 xfree(tinfo->full_name);
351 xmlFreeTextReader(tinfo->reader);
352 odr_destroy(tinfo->odr);
356 static int ioread_ex(void *context, char *buffer, int len)
358 struct recExtractCtrl *p = context;
359 return p->stream->readf(p->stream, buffer, len);
362 static int ioclose_ex(void *context)
367 static void index_cdata(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
368 xmlNodePtr ptr, RecWord *recWord)
370 for(; ptr; ptr = ptr->next)
372 index_cdata(tinfo, ctrl, ptr->children, recWord);
373 if (ptr->type != XML_TEXT_NODE)
375 recWord->term_buf = (const char *)ptr->content;
376 recWord->term_len = XML_STRLEN(ptr->content);
377 (*ctrl->tokenAdd)(recWord);
381 static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
382 xmlNodePtr ptr, RecWord *recWord)
384 for(; ptr; ptr = ptr->next)
386 index_node(tinfo, ctrl, ptr->children, recWord);
387 if (ptr->type != XML_ELEMENT_NODE || !ptr->ns ||
388 XML_STRCMP(ptr->ns->href, zebra_xslt_ns))
390 if (!XML_STRCMP(ptr->name, "index"))
392 const char *name_str = 0;
393 const char *type_str = 0;
394 const char *xpath_str = 0;
395 struct _xmlAttr *attr;
396 for (attr = ptr->properties; attr; attr = attr->next)
398 attr_content(attr, "name", &name_str);
399 attr_content(attr, "xpath", &xpath_str);
400 attr_content(attr, "type", &type_str);
404 int prev_type = recWord->index_type; /* save default type */
406 if (type_str && *type_str)
407 recWord->index_type = *type_str; /* type was given */
408 recWord->index_name = name_str;
409 index_cdata(tinfo, ctrl, ptr->children, recWord);
411 recWord->index_type = prev_type; /* restore it again */
417 static void index_record(struct filter_info *tinfo,struct recExtractCtrl *ctrl,
418 xmlNodePtr ptr, RecWord *recWord)
420 const char *type_str = "update";
422 if (ptr && ptr->type == XML_ELEMENT_NODE && ptr->ns &&
423 !XML_STRCMP(ptr->ns->href, zebra_xslt_ns)
424 && !XML_STRCMP(ptr->name, "record"))
426 const char *id_str = 0;
427 const char *rank_str = 0;
428 struct _xmlAttr *attr;
429 for (attr = ptr->properties; attr; attr = attr->next)
431 attr_content(attr, "type", &type_str);
432 attr_content(attr, "id", &id_str);
433 attr_content(attr, "rank", &rank_str);
436 sscanf(id_str, "%255s", ctrl->match_criteria);
439 ctrl->staticrank = atozint(rank_str);
443 if (!strcmp("update", type_str))
444 index_node(tinfo, ctrl, ptr, recWord);
445 else if (!strcmp("delete", type_str))
446 yaz_log(YLOG_WARN, "alvis filter delete: to be implemented");
448 yaz_log(YLOG_WARN, "alvis filter: unknown record type '%s'",
452 static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
456 const char *params[10];
460 struct filter_schema *schema = lookup_schema(tinfo, zebra_xslt_ns);
463 set_param_str(params, "schema", zebra_xslt_ns, tinfo->odr);
465 (*p->init)(p, &recWord);
467 if (schema && schema->stylesheet_xsp)
471 xsltApplyStylesheet(schema->stylesheet_xsp,
473 if (p->flagShowRecords)
475 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
476 fwrite(buf_out, len_out, 1, stdout);
479 root_ptr = xmlDocGetRootElement(resDoc);
481 index_record(tinfo, p, root_ptr, &recWord);
484 yaz_log(YLOG_WARN, "No root for index XML record."
485 " split_level=%d stylesheet=%s",
486 tinfo->split_level, schema->stylesheet);
490 xmlDocDumpMemory(doc, &buf_out, &len_out);
491 if (p->flagShowRecords)
492 fwrite(buf_out, len_out, 1, stdout);
493 (*p->setStoreData)(p, buf_out, len_out);
497 return RECCTRL_EXTRACT_OK;
500 static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
507 xmlFreeTextReader(tinfo->reader);
508 tinfo->reader = xmlReaderForIO(ioread_ex, ioclose_ex,
517 return RECCTRL_EXTRACT_ERROR_GENERIC;
519 ret = xmlTextReaderRead(tinfo->reader);
522 int type = xmlTextReaderNodeType(tinfo->reader);
523 int depth = xmlTextReaderDepth(tinfo->reader);
524 if (type == XML_READER_TYPE_ELEMENT && tinfo->split_level == depth)
526 xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
529 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
530 xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
532 xmlDocSetRootElement(doc, ptr2);
534 return extract_doc(tinfo, p, doc);
538 xmlFreeTextReader(tinfo->reader);
540 return RECCTRL_EXTRACT_ERROR_GENERIC;
543 ret = xmlTextReaderRead(tinfo->reader);
545 xmlFreeTextReader(tinfo->reader);
547 return RECCTRL_EXTRACT_EOF;
550 static int extract_full(struct filter_info *tinfo, struct recExtractCtrl *p)
552 if (p->first_record) /* only one record per stream */
554 xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */,
561 return RECCTRL_EXTRACT_ERROR_GENERIC;
563 xmlNodePtr root = xmlDocGetRootElement(doc);
565 return RECCTRL_EXTRACT_ERROR_GENERIC;
568 return extract_doc(tinfo, p, doc);
571 return RECCTRL_EXTRACT_EOF;
574 static int filter_extract(void *clientData, struct recExtractCtrl *p)
576 struct filter_info *tinfo = clientData;
578 odr_reset(tinfo->odr);
579 if (tinfo->split_level == 0)
580 return extract_full(tinfo, p);
582 return extract_split(tinfo, p);
585 static int ioread_ret(void *context, char *buffer, int len)
587 struct recRetrieveCtrl *p = context;
588 return p->stream->readf(p->stream, buffer, len);
591 static int ioclose_ret(void *context)
596 static const char *snippet_doc(struct recRetrieveCtrl *p, int text_mode,
599 const char *xml_doc_str;
601 WRBUF wrbuf = wrbuf_alloc();
602 zebra_snippets *res =
603 zebra_snippets_window(p->doc_snippet, p->hit_snippet, window_size);
604 zebra_snippet_word *w = zebra_snippets_list(res);
607 wrbuf_printf(wrbuf, "\'");
609 wrbuf_printf(wrbuf, "<snippet xmlns='%s'>\n", zebra_xslt_ns);
610 for (; w; w = w->next)
614 else if (ord != w->ord)
618 wrbuf_printf(wrbuf, "%s%s%s ",
621 w->match ? "*" : "");
624 wrbuf_printf(wrbuf, " <term ord='%d' seqno='" ZINT_FORMAT "' %s>",
626 (w->match ? "match='1'" : ""));
627 wrbuf_xmlputs(wrbuf, w->term);
628 wrbuf_printf(wrbuf, "</term>\n");
632 wrbuf_printf(wrbuf, "\'");
634 wrbuf_printf(wrbuf, "</snippet>\n");
636 xml_doc_str = odr_strdup(p->odr, wrbuf_cstr(wrbuf));
638 zebra_snippets_destroy(res);
639 wrbuf_destroy(wrbuf);
643 static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
645 /* const char *esn = zebra_xslt_ns; */
647 const char *params[32];
648 struct filter_info *tinfo = clientData;
651 struct filter_schema *schema;
652 int window_size = -1;
656 if (p->comp->which == Z_RecordComp_simple
657 && p->comp->u.simple->which == Z_ElementSetNames_generic)
659 esn = p->comp->u.simple->u.generic;
661 else if (p->comp->which == Z_RecordComp_complex
662 && p->comp->u.complex->generic->elementSpec
663 && p->comp->u.complex->generic->elementSpec->which ==
664 Z_ElementSpec_elementSetName)
666 esn = p->comp->u.complex->generic->elementSpec->u.elementSetName;
669 schema = lookup_schema(tinfo, esn);
673 YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
677 if (schema->include_snippet)
678 window_size = atoi(schema->include_snippet);
681 set_param_int(params, "id", p->localno, p->odr);
683 set_param_str(params, "filename", p->fname, p->odr);
684 if (p->staticrank >= 0)
685 set_param_int(params, "rank", p->staticrank, p->odr);
688 set_param_str(params, "schema", esn, p->odr);
691 set_param_str(params, "schema", schema->name, p->odr);
692 else if (schema->identifier)
693 set_param_str(params, "schema", schema->identifier, p->odr);
695 set_param_str(params, "schema", "", p->odr);
698 set_param_int(params, "score", p->score, p->odr);
699 set_param_int(params, "size", p->recordSize, p->odr);
701 if (window_size >= 0)
702 set_param_xml(params, "snippet", snippet_doc(p, 1, window_size),
704 doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
707 XML_PARSE_XINCLUDE | XML_PARSE_NOENT | XML_PARSE_NONET);
710 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
714 if (window_size >= 0)
716 xmlNodePtr node = xmlDocGetRootElement(doc);
717 const char *snippet_str = snippet_doc(p, 0, window_size);
718 xmlDocPtr snippet_doc = xmlParseMemory(snippet_str, strlen(snippet_str));
719 xmlAddChild(node, xmlDocGetRootElement(snippet_doc));
721 if (!schema->stylesheet_xsp)
725 resDoc = xsltApplyStylesheet(schema->stylesheet_xsp,
731 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
733 else if (!p->input_format
734 || !oid_oidcmp(p->input_format, yaz_oid_recsyn_xml))
739 if (schema->stylesheet_xsp)
740 xsltSaveResultToString(&buf_out, &len_out, resDoc,
741 schema->stylesheet_xsp);
743 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
745 p->output_format = yaz_oid_recsyn_xml;
746 p->rec_len = len_out;
747 p->rec_buf = odr_malloc(p->odr, p->rec_len);
748 memcpy(p->rec_buf, buf_out, p->rec_len);
751 else if (!oid_oidcmp(p->output_format, yaz_oid_recsyn_sutrs))
756 if (schema->stylesheet_xsp)
757 xsltSaveResultToString(&buf_out, &len_out, resDoc,
758 schema->stylesheet_xsp);
760 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
762 p->output_format = yaz_oid_recsyn_sutrs;
763 p->rec_len = len_out;
764 p->rec_buf = odr_malloc(p->odr, p->rec_len);
765 memcpy(p->rec_buf, buf_out, p->rec_len);
771 p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
777 static struct recType filter_type = {
788 #ifdef IDZEBRA_STATIC_ALVIS
801 * indent-tabs-mode: nil
803 * vim: shiftwidth=4 tabstop=8 expandtab