1 /* $Id: xslt.c,v 1.6 2005-05-31 17:36:16 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
27 #include <yaz/diagbib1.h>
28 #include <libxml/xmlversion.h>
29 #include <libxml/parser.h>
30 #include <libxml/tree.h>
31 #include <libxml/xmlreader.h>
32 #include <libxslt/transform.h>
34 #include <idzebra/util.h>
35 #include <idzebra/recctrl.h>
37 struct filter_schema {
39 const char *identifier;
40 const char *stylesheet;
41 struct filter_schema *next;
42 const char *default_schema;
43 xsltStylesheetPtr stylesheet_xsp;
51 struct filter_schema *schemas;
52 xmlTextReaderPtr reader;
55 #define ZEBRA_INDEX_NS "http://indexdata.dk/zebra/indexing/1"
56 #define ZEBRA_SCHEMA_IDENTITY_NS "http://indexdata.dk/zebra/identity/1"
57 static const char *zebra_index_ns = ZEBRA_INDEX_NS;
59 static void set_param_str(const char **params, const char *name,
60 const char *value, ODR odr)
62 char *quoted = odr_malloc(odr, 3 + strlen(value));
63 sprintf(quoted, "'%s'", value);
71 static void set_param_int(const char **params, const char *name,
74 char *quoted = odr_malloc(odr, 30); /* 25 digits enough for 2^64 */
77 sprintf(quoted, "'" ZINT_FORMAT "'", value);
84 static void *filter_init_xslt(Res res, RecType recType)
86 struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
89 tinfo->split_depth = 0;
90 tinfo->odr = odr_createmem(ODR_ENCODE);
96 static void *filter_init_xslt1(Res res, RecType recType)
98 struct filter_info *tinfo = (struct filter_info *)
99 filter_init_xslt(res, recType);
100 tinfo->split_depth = 1;
104 static int attr_content(struct _xmlAttr *attr, const char *name,
105 const char **dst_content)
107 if (!strcmp(attr->name, name) && attr->children &&
108 attr->children->type == XML_TEXT_NODE)
110 *dst_content = attr->children->content;
116 static void destroy_schemas(struct filter_info *tinfo)
118 struct filter_schema *schema = tinfo->schemas;
121 struct filter_schema *schema_next = schema->next;
122 if (schema->stylesheet_xsp)
123 xsltFreeStylesheet(schema->stylesheet_xsp);
125 schema = schema_next;
130 xmlFreeDoc(tinfo->doc);
134 static ZEBRA_RES create_schemas(struct filter_info *tinfo, const char *fname)
137 tinfo->fname = xstrdup(fname);
138 tinfo->doc = xmlParseFile(tinfo->fname);
141 ptr = xmlDocGetRootElement(tinfo->doc);
142 if (!ptr || ptr->type != XML_ELEMENT_NODE ||
143 strcmp(ptr->name, "schemaInfo"))
145 for (ptr = ptr->children; ptr; ptr = ptr->next)
147 if (ptr->type == XML_ELEMENT_NODE &&
148 !strcmp(ptr->name, "schema"))
150 struct _xmlAttr *attr;
151 struct filter_schema *schema = xmalloc(sizeof(*schema));
153 schema->identifier = 0;
154 schema->stylesheet = 0;
155 schema->default_schema = 0;
156 schema->next = tinfo->schemas;
157 schema->stylesheet_xsp = 0;
158 tinfo->schemas = schema;
159 for (attr = ptr->properties; attr; attr = attr->next)
161 attr_content(attr, "identifier", &schema->identifier);
162 attr_content(attr, "name", &schema->name);
163 attr_content(attr, "stylesheet", &schema->stylesheet);
164 attr_content(attr, "default", &schema->default_schema);
166 if (schema->stylesheet)
167 schema->stylesheet_xsp =
168 xsltParseStylesheetFile(
169 (const xmlChar*) schema->stylesheet);
175 static struct filter_schema *lookup_schema(struct filter_info *tinfo,
178 struct filter_schema *schema;
179 for (schema = tinfo->schemas; schema; schema = schema->next)
183 if (schema->identifier && !strcmp(schema->identifier, est))
185 if (schema->name && !strcmp(schema->name, est))
188 if (schema->default_schema)
194 static void filter_config(void *clientData, Res res, const char *args)
196 struct filter_info *tinfo = clientData;
198 args = "xsltfilter.xml";
199 if (tinfo->fname && !strcmp(args, tinfo->fname))
201 destroy_schemas(tinfo);
202 create_schemas(tinfo, args);
205 static void filter_destroy(void *clientData)
207 struct filter_info *tinfo = clientData;
208 destroy_schemas(tinfo);
210 xmlFreeTextReader(tinfo->reader);
211 odr_destroy(tinfo->odr);
215 static int ioread_ex(void *context, char *buffer, int len)
217 struct recExtractCtrl *p = context;
218 return (*p->readf)(p->fh, buffer, len);
221 static int ioclose_ex(void *context)
226 static void index_field(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
227 xmlNodePtr ptr, RecWord *recWord)
229 for(; ptr; ptr = ptr->next)
231 index_field(tinfo, ctrl, ptr->children, recWord);
232 if (ptr->type != XML_TEXT_NODE)
234 recWord->term_buf = ptr->content;
235 recWord->term_len = strlen(ptr->content);
236 (*ctrl->tokenAdd)(recWord);
240 static void index_node(struct filter_info *tinfo, struct recExtractCtrl *ctrl,
241 xmlNodePtr ptr, RecWord *recWord)
243 for(; ptr; ptr = ptr->next)
245 index_node(tinfo, ctrl, ptr->children, recWord);
246 if (ptr->type != XML_ELEMENT_NODE || !ptr->ns ||
247 strcmp(ptr->ns->href, zebra_index_ns))
249 if (!strcmp(ptr->name, "index"))
252 const char *xpath_str = 0;
253 struct _xmlAttr *attr;
254 for (attr = ptr->properties; attr; attr = attr->next)
256 if (!strcmp(attr->name, "field")
257 && attr->children && attr->children->type == XML_TEXT_NODE)
258 field_str = attr->children->content;
259 if (!strcmp(attr->name, "xpath")
260 && attr->children && attr->children->type == XML_TEXT_NODE)
261 xpath_str = attr->children->content;
265 recWord->attrStr = field_str;
266 index_field(tinfo, ctrl, ptr->children, recWord);
272 static int extract_doc(struct filter_info *tinfo, struct recExtractCtrl *p,
276 const char *params[10];
280 struct filter_schema *schema = lookup_schema(tinfo, ZEBRA_INDEX_NS);
283 set_param_str(params, "schema", ZEBRA_INDEX_NS, tinfo->odr);
285 (*p->init)(p, &recWord);
286 recWord.reg_type = 'w';
288 if (schema && schema->stylesheet_xsp)
291 xsltApplyStylesheet(schema->stylesheet_xsp,
293 if (p->flagShowRecords)
295 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
296 fwrite(buf_out, len_out, 1, stdout);
299 index_node(tinfo, p, xmlDocGetRootElement(resDoc), &recWord);
302 xmlDocDumpMemory(doc, &buf_out, &len_out);
303 if (p->flagShowRecords)
304 fwrite(buf_out, len_out, 1, stdout);
305 (*p->setStoreData)(p, buf_out, len_out);
309 return RECCTRL_EXTRACT_OK;
312 static int extract_split(struct filter_info *tinfo, struct recExtractCtrl *p)
318 xmlFreeTextReader(tinfo->reader);
319 tinfo->reader = xmlReaderForIO(ioread_ex, ioclose_ex,
326 return RECCTRL_EXTRACT_ERROR_GENERIC;
328 ret = xmlTextReaderRead(tinfo->reader);
330 int type = xmlTextReaderNodeType(tinfo->reader);
331 int depth = xmlTextReaderDepth(tinfo->reader);
332 if (tinfo->split_depth == 0 ||
333 (type == XML_READER_TYPE_ELEMENT && tinfo->split_depth == depth))
335 xmlNodePtr ptr = xmlTextReaderExpand(tinfo->reader);
336 xmlNodePtr ptr2 = xmlCopyNode(ptr, 1);
337 xmlDocPtr doc = xmlNewDoc("1.0");
339 xmlDocSetRootElement(doc, ptr2);
341 return extract_doc(tinfo, p, doc);
343 ret = xmlTextReaderRead(tinfo->reader);
345 xmlFreeTextReader(tinfo->reader);
347 return RECCTRL_EXTRACT_EOF;
350 static int extract_full(struct filter_info *tinfo, struct recExtractCtrl *p)
352 if (p->first_record) /* only one record per stream */
354 xmlDocPtr doc = xmlReadIO(ioread_ex, ioclose_ex, p /* I/O handler */,
360 return RECCTRL_EXTRACT_ERROR_GENERIC;
362 return extract_doc(tinfo, p, doc);
365 return RECCTRL_EXTRACT_EOF;
368 static int filter_extract(void *clientData, struct recExtractCtrl *p)
370 struct filter_info *tinfo = clientData;
372 odr_reset(tinfo->odr);
374 if (tinfo->split_depth == 0)
375 return extract_full(tinfo, p);
378 return extract_split(tinfo, p);
382 static int ioread_ret(void *context, char *buffer, int len)
384 struct recRetrieveCtrl *p = context;
385 return (*p->readf)(p->fh, buffer, len);
388 static int ioclose_ret(void *context)
394 static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
396 const char *esn = ZEBRA_SCHEMA_IDENTITY_NS;
397 const char *params[10];
398 struct filter_info *tinfo = clientData;
401 struct filter_schema *schema;
405 if (p->comp->which != Z_RecordComp_simple
406 || p->comp->u.simple->which != Z_ElementSetNames_generic)
408 p->diagnostic = YAZ_BIB1_PRESENT_COMP_SPEC_PARAMETER_UNSUPP;
411 esn = p->comp->u.simple->u.generic;
413 schema = lookup_schema(tinfo, esn);
417 YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_;
422 set_param_str(params, "schema", esn, p->odr);
424 set_param_str(params, "filename", p->fname, p->odr);
426 set_param_int(params, "score", p->score, p->odr);
427 set_param_int(params, "size", p->recordSize, p->odr);
429 doc = xmlReadIO(ioread_ret, ioclose_ret, p /* I/O handler */,
435 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
439 if (!schema->stylesheet_xsp)
443 resDoc = xsltApplyStylesheet(schema->stylesheet_xsp,
449 p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS;
451 else if (p->input_format == VAL_NONE || p->input_format == VAL_TEXT_XML)
455 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
457 p->output_format = VAL_TEXT_XML;
458 p->rec_len = len_out;
459 p->rec_buf = odr_malloc(p->odr, p->rec_len);
460 memcpy(p->rec_buf, buf_out, p->rec_len);
464 else if (p->output_format == VAL_SUTRS)
468 xmlDocDumpMemory(resDoc, &buf_out, &len_out);
470 p->output_format = VAL_SUTRS;
471 p->rec_len = len_out;
472 p->rec_buf = odr_malloc(p->odr, p->rec_len);
473 memcpy(p->rec_buf, buf_out, p->rec_len);
479 p->diagnostic = YAZ_BIB1_RECORD_SYNTAX_UNSUPP;
485 static struct recType filter_type_xslt = {
495 static struct recType filter_type_xslt1 = {
506 #ifdef IDZEBRA_STATIC_XSLT
514 #ifdef LIBXML_READER_ENABLED