1 /* $Id: mod_dom.c,v 1.20 2007-02-23 14:59:12 adam Exp $
2 Copyright (C) 1995-2007
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
28 #include <yaz/diagbib1.h>
29 #include <yaz/tpath.h>
30 #include <yaz/snprintf.h>
32 #include <libxml/xmlversion.h>
33 #include <libxml/parser.h>
34 #include <libxml/tree.h>
35 #include <libxml/xmlIO.h>
36 #include <libxml/xmlreader.h>
37 #include <libxslt/transform.h>
38 #include <libxslt/xsltutils.h>
41 #include <libexslt/exslt.h>
44 #include <idzebra/util.h>
45 #include <idzebra/recctrl.h>
47 /* DOM filter style indexing */
48 #define ZEBRA_DOM_NS "http://indexdata.com/zebra-2.0"
49 static const char *zebra_dom_ns = ZEBRA_DOM_NS;
51 /* DOM filter style indexing */
52 #define ZEBRA_PI_NAME "zebra-2.0"
53 static const char *zebra_pi_name = ZEBRA_PI_NAME;
58 const char *stylesheet;
59 xsltStylesheetPtr stylesheet_xsp;
60 struct convert_s *next;
63 struct filter_extract {
65 struct convert_s *convert;
69 struct convert_s *convert;
72 struct filter_retrieve {
74 const char *identifier;
75 struct convert_s *convert;
76 struct filter_retrieve *next;
79 #define DOM_INPUT_XMLREADER 1
80 #define DOM_INPUT_MARC 2
84 struct convert_s *convert;
88 const char *input_charset;
93 xmlTextReaderPtr reader;
97 struct filter_input *next;
103 const char *profile_path;
106 xmlDocPtr doc_config;
107 struct filter_extract *extract;
108 struct filter_retrieve *retrieve_list;
109 struct filter_input *input_list;
110 struct filter_store *store;
115 #define XML_STRCMP(a,b) strcmp((char*)a, b)
116 #define XML_STRLEN(a) strlen((char*)a)
119 #define FOR_EACH_ELEMENT(ptr) for (; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE)
121 static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
122 const char *fmt, ...)
124 __attribute__ ((format (printf, 4, 5)))
128 static void dom_log(int level, struct filter_info *tinfo, xmlNodePtr ptr,
129 const char *fmt, ...)
135 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
138 yaz_log(level, "%s:%ld: %s", tinfo->fname ? tinfo->fname : "none",
139 xmlGetLineNo(ptr), buf);
143 yaz_log(level, "%s: %s", tinfo->fname ? tinfo->fname : "none", buf);
149 static void set_param_str(const char **params, const char *name,
150 const char *value, ODR odr)
152 char *quoted = odr_malloc(odr, 3 + strlen(value));
153 sprintf(quoted, "'%s'", value);
161 static void set_param_int(const char **params, const char *name,
164 char *quoted = odr_malloc(odr, 30); /* 25 digits enough for 2^64 */
167 sprintf(quoted, "'" ZINT_FORMAT "'", value);
173 static void *filter_init(Res res, RecType recType)
175 struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
177 tinfo->full_name = 0;
178 tinfo->profile_path = 0;
179 tinfo->odr_record = odr_createmem(ODR_ENCODE);
180 tinfo->odr_config = odr_createmem(ODR_ENCODE);
182 tinfo->retrieve_list = 0;
183 tinfo->input_list = 0;
185 tinfo->doc_config = 0;
194 static int attr_content(struct _xmlAttr *attr, const char *name,
195 const char **dst_content)
197 if (!XML_STRCMP(attr->name, name) && attr->children
198 && attr->children->type == XML_TEXT_NODE)
200 *dst_content = (const char *)(attr->children->content);
206 static void destroy_xsp(struct convert_s *c)
210 if (c->stylesheet_xsp)
211 xsltFreeStylesheet(c->stylesheet_xsp);
216 static void destroy_dom(struct filter_info *tinfo)
220 destroy_xsp(tinfo->extract->convert);
225 destroy_xsp(tinfo->store->convert);
228 if (tinfo->input_list)
230 struct filter_input *i_ptr;
231 for (i_ptr = tinfo->input_list; i_ptr; i_ptr = i_ptr->next)
235 case DOM_INPUT_XMLREADER:
236 if (i_ptr->u.xmlreader.reader)
237 xmlFreeTextReader(i_ptr->u.xmlreader.reader);
240 yaz_iconv_close(i_ptr->u.marc.iconv);
241 yaz_marc_destroy(i_ptr->u.marc.handle);
244 destroy_xsp(i_ptr->convert);
246 tinfo->input_list = 0;
248 if (tinfo->retrieve_list)
250 struct filter_retrieve *r_ptr;
251 for (r_ptr = tinfo->retrieve_list; r_ptr; r_ptr = r_ptr->next)
252 destroy_xsp(r_ptr->convert);
253 tinfo->retrieve_list = 0;
256 if (tinfo->doc_config)
258 xmlFreeDoc(tinfo->doc_config);
259 tinfo->doc_config = 0;
261 odr_reset(tinfo->odr_config);
264 static ZEBRA_RES parse_convert(struct filter_info *tinfo, xmlNodePtr ptr,
265 struct convert_s **l)
268 FOR_EACH_ELEMENT(ptr) {
269 if (!XML_STRCMP(ptr->name, "xslt"))
271 struct _xmlAttr *attr;
273 = odr_malloc(tinfo->odr_config, sizeof(*p));
277 p->stylesheet_xsp = 0;
279 for (attr = ptr->properties; attr; attr = attr->next)
280 if (attr_content(attr, "stylesheet", &p->stylesheet))
284 dom_log(YLOG_WARN, tinfo, ptr,
285 "bad attribute @%s", attr->name);
289 char tmp_xslt_full_name[1024];
290 if (!yaz_filepath_resolve(p->stylesheet,
295 dom_log(YLOG_WARN, tinfo, 0,
296 "stylesheet %s not found in "
299 tinfo->profile_path);
304 = xsltParseStylesheetFile((const xmlChar*)
306 if (!p->stylesheet_xsp)
308 dom_log(YLOG_WARN, tinfo, 0,
309 "could not parse xslt stylesheet %s",
316 dom_log(YLOG_WARN, tinfo, ptr,
317 "missing attribute 'stylesheet' ");
325 dom_log(YLOG_WARN, tinfo, ptr,
326 "bad element '%s', expected <xslt>", ptr->name);
333 static ZEBRA_RES perform_convert(struct filter_info *tinfo,
334 struct convert_s *convert,
337 xsltStylesheetPtr *last_xsp)
339 for (; convert; convert = convert->next)
341 xmlDocPtr res_doc = xsltApplyStylesheet(convert->stylesheet_xsp,
344 *last_xsp = convert->stylesheet_xsp;
351 static struct filter_input *new_input(struct filter_info *tinfo, int type)
353 struct filter_input *p;
354 struct filter_input **np = &tinfo->input_list;
355 for (;*np; np = &(*np)->next)
357 p = *np = odr_malloc(tinfo->odr_config, sizeof(*p));
366 static ZEBRA_RES parse_input(struct filter_info *tinfo, xmlNodePtr ptr,
367 const char *syntax, const char *name)
369 FOR_EACH_ELEMENT(ptr) {
370 if (!XML_STRCMP(ptr->name, "marc"))
372 yaz_iconv_t iconv = 0;
373 const char *input_charset = "marc-8";
374 struct _xmlAttr *attr;
376 for (attr = ptr->properties; attr; attr = attr->next)
378 if (attr_content(attr, "inputcharset", &input_charset))
382 dom_log(YLOG_WARN, tinfo, ptr,
383 "bad attribute @%s, expected @inputcharset",
387 iconv = yaz_iconv_open("utf-8", input_charset);
390 dom_log(YLOG_WARN, tinfo, ptr,
391 "unsupported @charset '%s'", input_charset);
396 struct filter_input *p
397 = new_input(tinfo, DOM_INPUT_MARC);
398 p->u.marc.handle = yaz_marc_create();
399 p->u.marc.iconv = iconv;
401 yaz_marc_iconv(p->u.marc.handle, p->u.marc.iconv);
405 parse_convert(tinfo, ptr, &p->convert);
410 else if (!XML_STRCMP(ptr->name, "xmlreader"))
412 struct filter_input *p
413 = new_input(tinfo, DOM_INPUT_XMLREADER);
414 struct _xmlAttr *attr;
415 const char *level_str = 0;
417 p->u.xmlreader.split_level = 0;
418 p->u.xmlreader.reader = 0;
420 for (attr = ptr->properties; attr; attr = attr->next)
422 if (attr_content(attr, "level", &level_str))
426 dom_log(YLOG_WARN, tinfo, ptr,
427 "bad attribute @%s, expected @level",
432 p->u.xmlreader.split_level = atoi(level_str);
436 parse_convert(tinfo, ptr, &p->convert);
441 dom_log(YLOG_WARN, tinfo, ptr,
442 "bad element <%s>, expected <marc>|<xmlreader>",
450 static ZEBRA_RES parse_dom(struct filter_info *tinfo, const char *fname)
452 char tmp_full_name[1024];
456 tinfo->fname = odr_strdup(tinfo->odr_config, fname);
458 if (yaz_filepath_resolve(tinfo->fname, tinfo->profile_path,
459 NULL, tmp_full_name))
460 tinfo->full_name = odr_strdup(tinfo->odr_config, tmp_full_name);
462 tinfo->full_name = odr_strdup(tinfo->odr_config, tinfo->fname);
464 yaz_log(YLOG_LOG, "%s dom filter: "
465 "loading config file %s", tinfo->fname, tinfo->full_name);
467 doc = xmlParseFile(tinfo->full_name);
470 yaz_log(YLOG_WARN, "%s: dom filter: "
471 "failed to parse config file %s",
472 tinfo->fname, tinfo->full_name);
475 /* save because we store ptrs to the content */
476 tinfo->doc_config = doc;
478 ptr = xmlDocGetRootElement(doc);
479 if (!ptr || ptr->type != XML_ELEMENT_NODE
480 || XML_STRCMP(ptr->name, "dom"))
482 dom_log(YLOG_WARN, tinfo, ptr,
483 "bad root element <%s>, expected root element <dom>",
489 FOR_EACH_ELEMENT(ptr) {
490 if (!XML_STRCMP(ptr->name, "extract"))
493 <extract name="index">
494 <xslt stylesheet="first.xsl"/>
495 <xslt stylesheet="second.xsl"/>
498 struct _xmlAttr *attr;
499 struct filter_extract *f =
500 odr_malloc(tinfo->odr_config, sizeof(*f));
505 for (attr = ptr->properties; attr; attr = attr->next)
507 if (attr_content(attr, "name", &f->name))
511 dom_log(YLOG_WARN, tinfo, ptr,
512 "bad attribute @%s, expected @name",
516 parse_convert(tinfo, ptr->children, &f->convert);
518 else if (!XML_STRCMP(ptr->name, "retrieve"))
522 <xslt stylesheet="some.xsl"/>
523 <xslt stylesheet="some.xsl"/>
526 struct _xmlAttr *attr;
527 struct filter_retrieve **fp = &tinfo->retrieve_list;
528 struct filter_retrieve *f =
529 odr_malloc(tinfo->odr_config, sizeof(*f));
540 for (attr = ptr->properties; attr; attr = attr->next)
542 if (attr_content(attr, "identifier",
545 else if (attr_content(attr, "name", &f->name))
549 dom_log(YLOG_WARN, tinfo, ptr,
550 "bad attribute @%s, expected @identifier|@name",
554 parse_convert(tinfo, ptr->children, &f->convert);
556 else if (!XML_STRCMP(ptr->name, "store"))
560 <xslt stylesheet="some.xsl"/>
561 <xslt stylesheet="some.xsl"/>
564 struct filter_store *f =
565 odr_malloc(tinfo->odr_config, sizeof(*f));
569 parse_convert(tinfo, ptr->children, &f->convert);
571 else if (!XML_STRCMP(ptr->name, "input"))
575 <xmlreader level="1"/>
577 <input syntax="usmarc">
578 <marc inputcharset="marc-8"/>
581 struct _xmlAttr *attr;
582 const char *syntax = 0;
583 const char *name = 0;
584 for (attr = ptr->properties; attr; attr = attr->next)
586 if (attr_content(attr, "syntax", &syntax))
588 else if (attr_content(attr, "name", &name))
592 dom_log(YLOG_WARN, tinfo, ptr,
593 "bad attribute @%s, expected @syntax|@name",
597 parse_input(tinfo, ptr->children, syntax, name);
601 dom_log(YLOG_WARN, tinfo, ptr,
603 "expected <extract>|<input>|<retrieve>|<store>",
611 static struct filter_retrieve *lookup_retrieve(struct filter_info *tinfo,
614 struct filter_retrieve *f = tinfo->retrieve_list;
616 /* return first schema if no est is provided */
619 for (; f; f = f->next)
621 /* find requested schema */
624 if (f->identifier && !strcmp(f->identifier, est))
626 if (f->name && !strcmp(f->name, est))
633 static ZEBRA_RES filter_config(void *clientData, Res res, const char *args)
635 struct filter_info *tinfo = clientData;
638 yaz_log(YLOG_WARN, "dom filter: need config file");
642 if (tinfo->fname && !strcmp(args, tinfo->fname))
645 tinfo->profile_path = res_get(res, "profilePath");
648 return parse_dom(tinfo, args);
651 static void filter_destroy(void *clientData)
653 struct filter_info *tinfo = clientData;
655 odr_destroy(tinfo->odr_config);
656 odr_destroy(tinfo->odr_record);
660 static int ioread_ex(void *context, char *buffer, int len)
662 struct recExtractCtrl *p = context;
663 return p->stream->readf(p->stream, buffer, len);
666 static int ioclose_ex(void *context)
672 /* DOM filter style indexing */
673 static int attr_content_xml(struct _xmlAttr *attr, const char *name,
674 xmlChar **dst_content)
676 if (0 == XML_STRCMP(attr->name, name) && attr->children
677 && attr->children->type == XML_TEXT_NODE)
679 *dst_content = (attr->children->content);
686 /* DOM filter style indexing */
687 static void index_value_of(struct filter_info *tinfo,
688 struct recExtractCtrl *extctr,
693 xmlChar *text = xmlNodeGetContent(node);
694 size_t text_len = strlen((const char *)text);
697 /* if there is no text, we do not need to proceed */
700 xmlChar *look = index_p;
707 /* assingning text to be indexed */
708 recword->term_buf = (const char *)text;
709 recword->term_len = text_len;
711 /* parsing all index name/type pairs */
712 /* may not start with ' ' or ':' */
713 while (*look && ' ' != *look && ':' != *look)
715 /* setting name and type to zero */
719 /* parsing one index name */
721 while (*look && ':' != *look && ' ' != *look)
726 strncpy((char *)index, (const char *)bval, eval - bval);
727 index[eval - bval] = '\0';
730 /* parsing one index type, if existing */
736 while (*look && ' ' != *look)
741 strncpy((char *)type, (const char *)bval, eval - bval);
742 type[eval - bval] = '\0';
745 /* actually indexing the text given */
746 dom_log(YLOG_DEBUG, tinfo, 0,
747 "INDEX '%s:%s' '%s'",
750 recword->index_name = (const char *)index;
752 recword->index_type = *type;
753 (extctr->tokenAdd)(recword);
755 /* eat whitespaces */
756 if (*look && ' ' == *look && *(look+1))
767 /* DOM filter style indexing */
768 static void set_record_info(struct filter_info *tinfo,
769 struct recExtractCtrl *extctr,
774 dom_log(YLOG_DEBUG, tinfo, 0,
775 "RECORD id=%s rank=%s type=%s",
776 id_p, rank_p, type_p);
779 sscanf((const char *)id_p, "%255s", extctr->match_criteria);
782 extctr->staticrank = atozint((const char *)rank_p);
784 /* if (!strcmp("update", type_str)) */
785 /* index_node(tinfo, ctrl, ptr, recword); */
786 /* else if (!strcmp("delete", type_str)) */
787 /* dom_log(YLOG_WARN, tinfo, ptr, "dom filter delete: to be implemented"); */
789 /* dom_log(YLOG_WARN, tinfo, ptr, "dom filter: unknown record type '%s'", */
795 /* DOM filter style indexing */
796 static void process_xml_element_zebra_node(struct filter_info *tinfo,
797 struct recExtractCtrl *extctr,
801 if (node->type == XML_ELEMENT_NODE && node->ns && node->ns->href
802 && 0 == XML_STRCMP(node->ns->href, zebra_dom_ns))
804 if (0 == XML_STRCMP(node->name, "index"))
806 xmlChar *index_p = 0;
808 struct _xmlAttr *attr;
809 for (attr = node->properties; attr; attr = attr->next)
811 if (attr_content_xml(attr, "name", &index_p))
813 index_value_of(tinfo, extctr, recword,node, index_p);
817 dom_log(YLOG_WARN, tinfo, node,
818 "bad attribute @%s, expected @name",
823 else if (0 == XML_STRCMP(node->name, "record"))
829 struct _xmlAttr *attr;
830 for (attr = node->properties; attr; attr = attr->next)
832 if (attr_content_xml(attr, "id", &id_p))
834 else if (attr_content_xml(attr, "rank", &rank_p))
836 else if (attr_content_xml(attr, "type", &type_p))
840 dom_log(YLOG_WARN, tinfo, node,
841 "bad attribute @%s, expected @id|@rank|@type",
845 if (type_p && 0 != strcmp("update", (const char *)type_p))
847 dom_log(YLOG_WARN, tinfo, node,
848 "attribute @%s, only implemented '@type='update'",
852 set_record_info(tinfo, extctr, id_p, rank_p, type_p);
856 dom_log(YLOG_WARN, tinfo, node,
858 " expected <record>|<index> in namespace '%s'",
859 node->name, zebra_dom_ns);
865 /* DOM filter style indexing */
866 static void process_xml_pi_node(struct filter_info *tinfo,
867 struct recExtractCtrl *extctr,
871 /* if right PI name, continue parsing PI */
872 if (0 == strcmp(zebra_pi_name, (const char *)node->name))
874 xmlChar *pi_p = node->content;
875 xmlChar *look = pi_p;
880 /* parsing PI record instructions */
881 if (0 == strncmp((const char *)look, "record", 6))
894 while (*look && ' ' == *look && *(look+1))
897 /* parse possible id */
898 if (*look && 0 == strncmp((const char *)look, "id=", 3))
902 while (*look && ' ' != *look)
905 strncpy((char *)id, (const char *)bval, eval - bval);
906 id[eval - bval] = '\0';
910 while (*look && ' ' == *look && *(look+1))
913 /* parse possible rank */
914 if (*look && 0 == strncmp((const char *)look, "rank=", 5))
918 while (*look && ' ' != *look)
921 strncpy((char *)rank, (const char *)bval, eval - bval);
922 rank[eval - bval] = '\0';
926 while (*look && ' ' == *look && *(look+1))
929 if (look && '\0' != *look)
931 dom_log(YLOG_WARN, tinfo, node,
932 "content '%s', can not parse '%s'",
936 set_record_info(tinfo, extctr, id, rank, 0);
939 /* parsing index instruction */
940 else if (0 == strncmp((const char *)look, "index", 5))
945 while (*look && ' ' == *look && *(look+1))
948 /* export index instructions to outside */
953 dom_log(YLOG_WARN, tinfo, node,
954 "content '%s', can not parse '%s'",
960 /* DOM filter style indexing */
961 static void process_xml_element_node(struct filter_info *tinfo,
962 struct recExtractCtrl *extctr,
966 /* remember indexing instruction from PI to next element node */
967 xmlChar *index_p = 0;
969 /* check if we are an element node in the special zebra namespace
970 and either set record data or index value-of node content*/
971 process_xml_element_zebra_node(tinfo, extctr, recword, node);
973 /* loop through kid nodes */
974 for (node = node->children; node; node = node->next)
976 /* check and set PI record and index index instructions */
977 if (node->type == XML_PI_NODE)
979 process_xml_pi_node(tinfo, extctr, node, &index_p);
981 else if (node->type == XML_ELEMENT_NODE)
983 /* if there was a PI index instruction before this element */
986 index_value_of(tinfo, extctr, recword, node, index_p);
989 process_xml_element_node(tinfo, extctr, recword,node);
997 /* DOM filter style indexing */
998 static void extract_dom_doc_node(struct filter_info *tinfo,
999 struct recExtractCtrl *extctr,
1005 /* only need to do the initialization once, reuse recword for all terms */
1007 (*extctr->init)(extctr, &recword);
1009 if (extctr->flagShowRecords)
1011 xmlDocDumpMemory(doc, &buf_out, &len_out);
1012 fwrite(buf_out, len_out, 1, stdout);
1016 process_xml_element_node(tinfo, extctr, &recword, (xmlNodePtr)doc);
1022 static int convert_extract_doc(struct filter_info *tinfo,
1023 struct filter_input *input,
1024 struct recExtractCtrl *p,
1030 const char *params[10];
1031 xsltStylesheetPtr last_xsp = 0;
1032 xmlDocPtr store_doc = 0;
1035 set_param_str(params, "schema", zebra_dom_ns, tinfo->odr_record);
1037 /* input conversion */
1038 perform_convert(tinfo, input->convert, params, &doc, 0);
1042 /* store conversion */
1043 store_doc = xmlCopyDoc(doc, 1);
1044 perform_convert(tinfo, tinfo->store->convert,
1045 params, &store_doc, &last_xsp);
1049 xsltSaveResultToString(&buf_out, &len_out,
1050 store_doc ? store_doc : doc, last_xsp);
1052 xmlDocDumpMemory(store_doc ? store_doc : doc, &buf_out, &len_out);
1053 if (p->flagShowRecords)
1054 fwrite(buf_out, len_out, 1, stdout);
1055 (*p->setStoreData)(p, buf_out, len_out);
1059 xmlFreeDoc(store_doc);
1061 /* extract conversion */
1062 perform_convert(tinfo, tinfo->extract->convert, params, &doc, 0);
1064 /* finally, do the indexing */
1067 extract_dom_doc_node(tinfo, p, doc);
1068 /* extract_doc_alvis(tinfo, p, doc); */
1072 return RECCTRL_EXTRACT_OK;
1075 static int extract_xml_split(struct filter_info *tinfo,
1076 struct filter_input *input,
1077 struct recExtractCtrl *p)
1081 if (p->first_record)
1083 if (input->u.xmlreader.reader)
1084 xmlFreeTextReader(input->u.xmlreader.reader);
1085 input->u.xmlreader.reader = xmlReaderForIO(ioread_ex, ioclose_ex,
1086 p /* I/O handler */,
1092 if (!input->u.xmlreader.reader)
1093 return RECCTRL_EXTRACT_ERROR_GENERIC;
1095 ret = xmlTextReaderRead(input->u.xmlreader.reader);
1098 int type = xmlTextReaderNodeType(input->u.xmlreader.reader);
1099 int depth = xmlTextReaderDepth(input->u.xmlreader.reader);
1100 if (type == XML_READER_TYPE_ELEMENT &&
1101 input->u.xmlreader.split_level == depth)
1104 = xmlTextReaderExpand(input->u.xmlreader.reader);
1107 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
1108 xmlDocPtr doc = xmlNewDoc((const xmlChar*) "1.0");
1110 xmlDocSetRootElement(doc, ptr2);
1112 return convert_extract_doc(tinfo, input, p, doc);
1116 xmlFreeTextReader(input->u.xmlreader.reader);
1117 input->u.xmlreader.reader = 0;
1118 return RECCTRL_EXTRACT_ERROR_GENERIC;
1121 ret = xmlTextReaderRead(input->u.xmlreader.reader);
1123 xmlFreeTextReader(input->u.xmlreader.reader);
1124 input->u.xmlreader.reader = 0;
1125 return RECCTRL_EXTRACT_EOF;
1128 static int extract_xml_full(struct filter_info *tinfo,
1129 struct filter_input *input,
1130 struct recExtractCtrl *p)
1132 if (p->first_record) /* only one record per stream */
1134 xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex,
1135 p /* I/O handler */,
1138 XML_PARSE_XINCLUDE|XML_PARSE_NOENT);
1141 return RECCTRL_EXTRACT_ERROR_GENERIC;
1143 return convert_extract_doc(tinfo, input, p, doc);
1146 return RECCTRL_EXTRACT_EOF;
1149 static int extract_iso2709(struct filter_info *tinfo,
1150 struct filter_input *input,
1151 struct recExtractCtrl *p)
1157 if (p->stream->readf(p->stream, buf, 5) != 5)
1158 return RECCTRL_EXTRACT_EOF;
1159 while (*buf < '0' || *buf > '9')
1163 dom_log(YLOG_WARN, tinfo, 0,
1164 "MARC: Skipping bad byte %d (0x%02X)",
1165 *buf & 0xff, *buf & 0xff);
1166 for (i = 0; i<4; i++)
1169 if (p->stream->readf(p->stream, buf+4, 1) != 1)
1170 return RECCTRL_EXTRACT_EOF;
1172 record_length = atoi_n (buf, 5);
1173 if (record_length < 25)
1175 dom_log(YLOG_WARN, tinfo, 0,
1176 "MARC record length < 25, is %d", record_length);
1177 return RECCTRL_EXTRACT_ERROR_GENERIC;
1179 read_bytes = p->stream->readf(p->stream, buf+5, record_length-5);
1180 if (read_bytes < record_length-5)
1182 dom_log(YLOG_WARN, tinfo, 0,
1183 "couldn't read whole MARC record");
1184 return RECCTRL_EXTRACT_ERROR_GENERIC;
1186 r = yaz_marc_read_iso2709(input->u.marc.handle, buf, record_length);
1187 if (r < record_length)
1189 dom_log (YLOG_WARN, tinfo, 0,
1190 "parsing of MARC record failed r=%d length=%d",
1192 return RECCTRL_EXTRACT_ERROR_GENERIC;
1198 yaz_marc_write_xml(input->u.marc.handle, &root_ptr, 0, 0, 0);
1199 rdoc = xmlNewDoc((const xmlChar*) "1.0");
1200 xmlDocSetRootElement(rdoc, root_ptr);
1201 return convert_extract_doc(tinfo, input, p, rdoc);
1203 return RECCTRL_EXTRACT_OK;
1206 static int filter_extract(void *clientData, struct recExtractCtrl *p)
1208 struct filter_info *tinfo = clientData;
1209 struct filter_input *input = tinfo->input_list;
1212 return RECCTRL_EXTRACT_ERROR_GENERIC;
1214 odr_reset(tinfo->odr_record);
1217 case DOM_INPUT_XMLREADER:
1218 if (input->u.xmlreader.split_level == 0)
1219 return extract_xml_full(tinfo, input, p);
1221 return extract_xml_split(tinfo, input, p);
1223 case DOM_INPUT_MARC:
1224 return extract_iso2709(tinfo, input, p);
1226 return RECCTRL_EXTRACT_ERROR_GENERIC;
1229 static int ioread_ret(void *context, char *buffer, int len)
1231 struct recRetrieveCtrl *p = context;
1232 return p->stream->readf(p->stream, buffer, len);
1235 static int ioclose_ret(void *context)
1240 static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
1242 /* const char *esn = zebra_dom_ns; */
1243 const char *esn = 0;
1244 const char *params[32];
1245 struct filter_info *tinfo = clientData;
1247 struct filter_retrieve *retrieve;
1248 xsltStylesheetPtr last_xsp = 0;
1252 if (p->comp->which == Z_RecordComp_simple
1253 && p->comp->u.simple->which == Z_ElementSetNames_generic)
1255 esn = p->comp->u.simple->u.generic;
1257 else if (p->comp->which == Z_RecordComp_complex
1258 && p->comp->u.complex->generic->elementSpec
1259 && p->comp->u.complex->generic->elementSpec->which ==
1260 Z_ElementSpec_elementSetName)
1262 esn = p->comp->u.complex->generic->elementSpec->u.elementSetName;
1265 retrieve = lookup_retrieve(tinfo, esn);
1269 YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
1274 set_param_int(params, "id", p->localno, p->odr);
1276 set_param_str(params, "filename", p->fname, p->odr);
1277 if (p->staticrank >= 0)
1278 set_param_int(params, "rank", p->staticrank, p->odr);
1281 set_param_str(params, "schema", esn, p->odr);
1284 set_param_str(params, "schema", retrieve->name, p->odr);
1285 else if (retrieve->identifier)
1286 set_param_str(params, "schema", retrieve->identifier, p->odr);
1288 set_param_str(params, "schema", "", p->odr);
1291 set_param_int(params, "score", p->score, p->odr);
1292 set_param_int(params, "size", p->recordSize, p->odr);
1294 doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
1297 XML_PARSE_XINCLUDE|XML_PARSE_NOENT);
1300 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1304 /* retrieve conversion */
1305 perform_convert(tinfo, retrieve->convert, params, &doc, &last_xsp);
1308 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
1310 else if (p->input_format == VAL_NONE || p->input_format == VAL_TEXT_XML)
1316 xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1318 xmlDocDumpMemory(doc, &buf_out, &len_out);
1320 p->output_format = VAL_TEXT_XML;
1321 p->rec_len = len_out;
1322 p->rec_buf = odr_malloc(p->odr, p->rec_len);
1323 memcpy(p->rec_buf, buf_out, p->rec_len);
1326 else if (p->output_format == VAL_SUTRS)
1332 xsltSaveResultToString(&buf_out, &len_out, doc, last_xsp);
1334 xmlDocDumpMemory(doc, &buf_out, &len_out);
1336 p->output_format = VAL_SUTRS;
1337 p->rec_len = len_out;
1338 p->rec_buf = odr_malloc(p->odr, p->rec_len);
1339 memcpy(p->rec_buf, buf_out, p->rec_len);
1345 p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
1351 static struct recType filter_type = {
1362 #ifdef IDZEBRA_STATIC_DOM
1375 * indent-tabs-mode: nil
1377 * vim: shiftwidth=4 tabstop=8 expandtab