1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2012 Index Data
3 * See the file LICENSE for details.
7 * \brief Record Conversions utility
15 #include <yaz/yaz-iconv.h>
16 #include <yaz/marcdisp.h>
17 #include <yaz/record_conv.h>
18 #include <yaz/wrbuf.h>
19 #include <yaz/xmalloc.h>
21 #include <yaz/tpath.h>
22 #include <yaz/z-opac.h>
25 #include <libxml/parser.h>
26 #include <libxml/tree.h>
27 #include <libxml/xinclude.h>
29 #include <libxslt/xsltutils.h>
30 #include <libxslt/transform.h>
33 #include <libexslt/exslt.h>
36 /** \brief The internal structure for yaz_record_conv_t */
37 struct yaz_record_conv_struct {
38 /** \brief memory for configuration */
41 /** \brief conversion rules (allocated using NMEM) */
42 struct yaz_record_conv_rule *rules;
44 /** \brief pointer to last conversion rule pointer in chain */
45 struct yaz_record_conv_rule **rules_p;
47 /** \brief string buffer for error messages */
50 /** \brief path for opening files */
56 const char *input_charset;
57 const char *output_charset;
58 int input_format_mode;
59 int output_format_mode;
62 /** \brief tranformation info (rule info) */
63 struct yaz_record_conv_rule {
64 struct yaz_record_conv_type *type;
66 struct yaz_record_conv_rule *next;
69 /** \brief reset rules+configuration */
70 static void yaz_record_conv_reset(yaz_record_conv_t p)
73 struct yaz_record_conv_rule *r;
74 for (r = p->rules; r; r = r->next)
76 r->type->destroy(r->info);
78 wrbuf_rewind(p->wr_error);
83 p->rules_p = &p->rules;
86 void yaz_record_conv_destroy(yaz_record_conv_t p)
90 yaz_record_conv_reset(p);
91 nmem_destroy(p->nmem);
92 wrbuf_destroy(p->wr_error);
100 static void *construct_xslt(const xmlNode *ptr,
101 const char *path, WRBUF wr_error)
103 struct _xmlAttr *attr;
104 const char *stylesheet = 0;
106 if (strcmp((const char *) ptr->name, "xslt"))
109 for (attr = ptr->properties; attr; attr = attr->next)
111 if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
112 attr->children && attr->children->type == XML_TEXT_NODE)
113 stylesheet = (const char *) attr->children->content;
116 wrbuf_printf(wr_error, "Bad attribute '%s'"
117 "Expected stylesheet.", attr->name);
123 wrbuf_printf(wr_error, "Element <xslt>: "
124 "attribute 'stylesheet' expected");
130 xsltStylesheetPtr xsp;
132 if (!yaz_filepath_resolve(stylesheet, path, 0, fullpath))
134 wrbuf_printf(wr_error, "Element <xslt stylesheet=\"%s\"/>:"
135 " could not locate stylesheet '%s'",
136 stylesheet, stylesheet);
138 wrbuf_printf(wr_error, " with path '%s'", path);
142 xsp_doc = xmlParseFile(fullpath);
145 wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
146 " xml parse failed: %s", stylesheet, fullpath);
148 wrbuf_printf(wr_error, " with path '%s'", path);
151 /* need to copy this before passing it to the processor. It will
152 be encapsulated in the xsp and destroyed by xsltFreeStylesheet */
153 xsp = xsltParseStylesheetDoc(xmlCopyDoc(xsp_doc, 1));
156 wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
157 " xslt parse failed: %s", stylesheet, fullpath);
159 wrbuf_printf(wr_error, " with path '%s'", path);
160 wrbuf_printf(wr_error, " ("
165 "EXSLT not supported"
173 xsltFreeStylesheet(xsp);
180 static int convert_xslt(void *info, WRBUF record, WRBUF wr_error)
183 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
187 wrbuf_printf(wr_error, "xmlParseMemory failed");
192 xmlDocPtr xsp_doc = xmlCopyDoc((xmlDocPtr) info, 1);
193 xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc);
194 xmlDocPtr res = xsltApplyStylesheet(xsp, doc, 0);
197 xmlChar *out_buf = 0;
200 #if HAVE_XSLTSAVERESULTTOSTRING
201 xsltSaveResultToString(&out_buf, &out_len, res, xsp);
203 xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
207 wrbuf_printf(wr_error,
208 "xsltSaveResultToString failed");
213 wrbuf_rewind(record);
214 wrbuf_write(record, (const char *) out_buf, out_len);
222 wrbuf_printf(wr_error, "xsltApplyStylesheet failed");
226 xsltFreeStylesheet(xsp); /* frees xsp_doc too */
231 static void destroy_xslt(void *info)
235 xmlDocPtr xsp_doc = info;
244 static void *construct_marc(const xmlNode *ptr,
245 const char *path, WRBUF wr_error)
247 NMEM nmem = nmem_create();
248 struct marc_info *info = nmem_malloc(nmem, sizeof(*info));
249 struct _xmlAttr *attr;
250 const char *input_format = 0;
251 const char *output_format = 0;
253 if (strcmp((const char *) ptr->name, "marc"))
260 info->input_charset = 0;
261 info->output_charset = 0;
262 info->input_format_mode = 0;
263 info->output_format_mode = 0;
265 for (attr = ptr->properties; attr; attr = attr->next)
267 if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
268 attr->children && attr->children->type == XML_TEXT_NODE)
269 info->input_charset = (const char *) attr->children->content;
270 else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
271 attr->children && attr->children->type == XML_TEXT_NODE)
272 info->output_charset = (const char *) attr->children->content;
273 else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
274 attr->children && attr->children->type == XML_TEXT_NODE)
275 input_format = (const char *) attr->children->content;
276 else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
277 attr->children && attr->children->type == XML_TEXT_NODE)
278 output_format = (const char *) attr->children->content;
281 wrbuf_printf(wr_error, "Element <marc>: expected attributes"
282 "'inputformat', 'inputcharset', 'outputformat' or"
283 " 'outputcharset', got attribute '%s'",
285 nmem_destroy(info->nmem);
291 wrbuf_printf(wr_error, "Element <marc>: "
292 "attribute 'inputformat' required");
293 nmem_destroy(info->nmem);
296 else if (!strcmp(input_format, "marc"))
298 info->input_format_mode = YAZ_MARC_ISO2709;
300 else if (!strcmp(input_format, "xml"))
302 info->input_format_mode = YAZ_MARC_MARCXML;
303 /** Libxml2 generates UTF-8 encoding by default .
304 So we convert from UTF-8 to outputcharset (if defined)
306 if (!info->input_charset && info->output_charset)
307 info->input_charset = "utf-8";
311 wrbuf_printf(wr_error, "Element <marc inputformat='%s'>: "
312 " Unsupported input format"
313 " defined by attribute value",
315 nmem_destroy(info->nmem);
321 wrbuf_printf(wr_error,
322 "Element <marc>: attribute 'outputformat' required");
323 nmem_destroy(info->nmem);
326 else if (!strcmp(output_format, "line"))
328 info->output_format_mode = YAZ_MARC_LINE;
330 else if (!strcmp(output_format, "marcxml"))
332 info->output_format_mode = YAZ_MARC_MARCXML;
333 if (info->input_charset && !info->output_charset)
334 info->output_charset = "utf-8";
336 else if (!strcmp(output_format, "turbomarc"))
338 info->output_format_mode = YAZ_MARC_TURBOMARC;
339 if (info->input_charset && !info->output_charset)
340 info->output_charset = "utf-8";
342 else if (!strcmp(output_format, "marc"))
344 info->output_format_mode = YAZ_MARC_ISO2709;
346 else if (!strcmp(output_format, "marcxchange"))
348 info->output_format_mode = YAZ_MARC_XCHANGE;
349 if (info->input_charset && !info->output_charset)
350 info->output_charset = "utf-8";
354 wrbuf_printf(wr_error, "Element <marc outputformat='%s'>: "
355 " Unsupported output format"
356 " defined by attribute value",
358 nmem_destroy(info->nmem);
361 if (info->input_charset && info->output_charset)
363 yaz_iconv_t cd = yaz_iconv_open(info->output_charset,
364 info->input_charset);
367 wrbuf_printf(wr_error,
368 "Element <marc inputcharset='%s' outputcharset='%s'>:"
369 " Unsupported character set mapping"
370 " defined by attribute values",
371 info->input_charset, info->output_charset);
372 nmem_destroy(info->nmem);
377 else if (info->input_charset)
379 wrbuf_printf(wr_error, "Element <marc>: "
380 "attribute 'outputcharset' missing");
381 nmem_destroy(info->nmem);
384 else if (info->output_charset)
386 wrbuf_printf(wr_error, "Element <marc>: "
387 "attribute 'inputcharset' missing");
388 nmem_destroy(info->nmem);
391 info->input_charset = nmem_strdup(info->nmem, info->input_charset);
392 info->output_charset = nmem_strdup(info->nmem, info->output_charset);
396 static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
398 struct marc_info *mi = info;
401 yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, mi->input_charset);
402 yaz_marc_t mt = yaz_marc_create();
404 yaz_marc_xml(mt, mi->output_format_mode);
407 yaz_marc_iconv(mt, cd);
408 if (mi->input_format_mode == YAZ_MARC_ISO2709)
410 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
417 else if (mi->input_format_mode == YAZ_MARC_MARCXML ||
418 mi->input_format_mode == YAZ_MARC_TURBOMARC)
420 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
424 wrbuf_printf(wr_error, "xmlParseMemory failed");
429 ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
431 wrbuf_printf(wr_error, "yaz_marc_read_xml failed");
437 wrbuf_printf(wr_error, "unsupported input format");
442 wrbuf_rewind(record);
443 ret = yaz_marc_write_mode(mt, record);
445 wrbuf_printf(wr_error, "yaz_marc_write_mode failed");
449 yaz_marc_destroy(mt);
453 static void destroy_marc(void *info)
455 struct marc_info *mi = info;
457 nmem_destroy(mi->nmem);
460 int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
461 struct yaz_record_conv_type *types)
463 struct yaz_record_conv_type bt[2];
466 bt[0].construct = construct_marc;
467 bt[0].convert = convert_marc;
468 bt[0].destroy = destroy_marc;
474 bt[1].construct = construct_xslt;
475 bt[1].convert = convert_xslt;
476 bt[1].destroy = destroy_xslt;
481 yaz_record_conv_reset(p);
483 /* parsing element children */
484 for (ptr = ptr->children; ptr; ptr = ptr->next)
486 struct yaz_record_conv_type *t;
487 struct yaz_record_conv_rule *r;
489 if (ptr->type != XML_ELEMENT_NODE)
491 for (t = &bt[0]; t; t = t->next)
493 wrbuf_rewind(p->wr_error);
494 info = t->construct(ptr, p->path, p->wr_error);
496 if (info || wrbuf_len(p->wr_error))
498 /* info== 0 and no error reported , ie not handled by it */
502 if (wrbuf_len(p->wr_error) == 0)
503 wrbuf_printf(p->wr_error, "Element <backend>: expected "
504 "<marc> or <xslt> element, got <%s>"
508 r = (struct yaz_record_conv_rule *) nmem_malloc(p->nmem, sizeof(*r));
511 r->type = nmem_malloc(p->nmem, sizeof(*t));
512 memcpy(r->type, t, sizeof(*t));
514 p->rules_p = &r->next;
519 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
521 return yaz_record_conv_configure_t(p, ptr, 0);
524 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
525 struct yaz_record_conv_rule *r,
526 const char *input_record_buf,
527 size_t input_record_len,
531 WRBUF record = output_record; /* pointer transfer */
532 wrbuf_rewind(p->wr_error);
534 wrbuf_write(record, input_record_buf, input_record_len);
535 for (; ret == 0 && r; r = r->next)
536 ret = r->type->convert(r->info, record, p->wr_error);
540 int yaz_record_conv_opac_record(yaz_record_conv_t p,
541 Z_OPACRecord *input_record,
545 struct yaz_record_conv_rule *r = p->rules;
546 if (!r || r->type->construct != construct_marc)
547 ret = -1; /* no marc rule so we can't do OPAC */
550 struct marc_info *mi = r->info;
552 WRBUF res = wrbuf_alloc();
553 yaz_marc_t mt = yaz_marc_create();
554 yaz_iconv_t cd = yaz_iconv_open(mi->output_charset,
557 wrbuf_rewind(p->wr_error);
558 yaz_marc_xml(mt, mi->output_format_mode);
560 yaz_marc_iconv(mt, cd);
562 yaz_opac_decode_wrbuf(mt, input_record, res);
565 ret = yaz_record_conv_record_rule(p,
567 wrbuf_buf(res), wrbuf_len(res),
570 yaz_marc_destroy(mt);
578 int yaz_record_conv_record(yaz_record_conv_t p,
579 const char *input_record_buf,
580 size_t input_record_len,
583 return yaz_record_conv_record_rule(p, p->rules,
585 input_record_len, output_record);
588 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
590 return wrbuf_cstr(p->wr_error);
593 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
598 p->path = xstrdup(path);
601 yaz_record_conv_t yaz_record_conv_create()
603 yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
604 p->nmem = nmem_create();
605 p->wr_error = wrbuf_alloc();
620 * c-file-style: "Stroustrup"
621 * indent-tabs-mode: nil
623 * vim: shiftwidth=4 tabstop=8 expandtab