1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
35 enum yaz_collection_state {
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
45 YAZ_MARC_CONTROLFIELD,
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
54 struct yaz_marc_subfield *subfields;
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
63 /** \brief a comment node */
64 struct yaz_marc_comment {
68 /** \brief MARC node */
69 struct yaz_marc_node {
70 enum YAZ_MARC_NODE_TYPE which;
72 struct yaz_marc_datafield datafield;
73 struct yaz_marc_controlfield controlfield;
77 struct yaz_marc_node *next;
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
83 struct yaz_marc_subfield *next;
86 /** \brief the internals of a yaz_marc_t handle */
93 int write_using_libxml2;
94 enum yaz_collection_state enable_collection;
99 struct yaz_marc_node *nodes;
100 struct yaz_marc_node **nodes_pp;
101 struct yaz_marc_subfield **subfield_pp;
104 yaz_marc_t yaz_marc_create(void)
106 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
107 mt->output_format = YAZ_MARC_LINE;
109 mt->write_using_libxml2 = 0;
110 mt->enable_collection = no_collection;
111 mt->m_wr = wrbuf_alloc();
114 strcpy(mt->subfield_str, " $");
115 strcpy(mt->endline_str, "\n");
117 mt->nmem = nmem_create();
122 void yaz_marc_destroy(yaz_marc_t mt)
126 nmem_destroy(mt->nmem);
127 wrbuf_destroy(mt->m_wr);
128 xfree(mt->leader_spec);
132 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
137 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
139 wrbuf_iconv_reset(wr, mt->iconv_cd);
142 static int marc_exec_leader(const char *leader_spec, char *leader,
146 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
148 struct yaz_marc_node *n = (struct yaz_marc_node *)
149 nmem_malloc(mt->nmem, sizeof(*n));
152 mt->nodes_pp = &n->next;
157 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
158 const xmlNode *ptr_data)
160 struct yaz_marc_node *n = yaz_marc_add_node(mt);
161 n->which = YAZ_MARC_CONTROLFIELD;
162 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
163 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
166 void yaz_marc_add_controlfield_turbo_xml(yaz_marc_t mt, char *tag,
167 const xmlNode *ptr_data)
169 struct yaz_marc_node *n = yaz_marc_add_node(mt);
170 n->which = YAZ_MARC_CONTROLFIELD;
171 n->u.controlfield.tag = tag;
172 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
178 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
180 struct yaz_marc_node *n = yaz_marc_add_node(mt);
181 n->which = YAZ_MARC_COMMENT;
182 n->u.comment = nmem_strdup(mt->nmem, comment);
185 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
191 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
192 yaz_marc_add_comment(mt, buf);
196 int yaz_marc_get_debug(yaz_marc_t mt)
201 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
203 struct yaz_marc_node *n = yaz_marc_add_node(mt);
204 n->which = YAZ_MARC_LEADER;
205 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
206 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
209 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
210 const char *data, size_t data_len)
212 struct yaz_marc_node *n = yaz_marc_add_node(mt);
213 n->which = YAZ_MARC_CONTROLFIELD;
214 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
215 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
221 sprintf(msg, "controlfield:");
222 for (i = 0; i < 16 && i < data_len; i++)
223 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
225 sprintf(msg + strlen(msg), " ..");
226 yaz_marc_add_comment(mt, msg);
230 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
231 const char *indicator, size_t indicator_len)
233 struct yaz_marc_node *n = yaz_marc_add_node(mt);
234 n->which = YAZ_MARC_DATAFIELD;
235 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
236 n->u.datafield.indicator =
237 nmem_strdupn(mt->nmem, indicator, indicator_len);
238 n->u.datafield.subfields = 0;
240 /* make subfield_pp the current (last one) */
241 mt->subfield_pp = &n->u.datafield.subfields;
244 // Magic function: adds a attribute value to the element name if it is plain characters.
245 // if not, and if the attribute name is not null, it will append a attribute element with the value
246 // if attribute name is null it will return a non-zero value meaning it couldnt handle the value.
248 int element_name_append_attribute_value(yaz_marc_t mt, WRBUF buffer, const char *attribute_name, char *code_data, size_t code_len)
250 // TODO Map special codes to something possible for XML ELEMENT names
254 for (index = 0; index < code_len; index++)
256 if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
257 (code_data[index] >= 'a' && code_data[index] <= 'z') ||
258 (code_data[index] >= 'A' && code_data[index] <= 'Z')))
263 if (encode && attribute_name)
264 wrbuf_printf(buffer, " %s=\"", attribute_name);
266 if (!encode || attribute_name)
267 wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len);
271 if (encode && attribute_name)
272 wrbuf_printf(buffer, "\""); // return error if we couldn't handle it.
277 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
278 const char *indicator, size_t indicator_len)
280 struct yaz_marc_node *n = yaz_marc_add_node(mt);
281 n->which = YAZ_MARC_DATAFIELD;
282 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
283 n->u.datafield.indicator =
284 nmem_strdupn(mt->nmem, indicator, indicator_len);
285 n->u.datafield.subfields = 0;
287 /* make subfield_pp the current (last one) */
288 mt->subfield_pp = &n->u.datafield.subfields;
291 void yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value, char *indicators)
293 struct yaz_marc_node *n = yaz_marc_add_node(mt);
294 n->which = YAZ_MARC_DATAFIELD;
295 n->u.datafield.tag = tag_value;
296 n->u.datafield.indicator = indicators;
297 n->u.datafield.subfields = 0;
299 // make subfield_pp the current (last one)
300 mt->subfield_pp = &n->u.datafield.subfields;
303 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
305 n->u.datafield.indicator = indicator;
310 void yaz_marc_add_subfield(yaz_marc_t mt,
311 const char *code_data, size_t code_data_len)
318 sprintf(msg, "subfield:");
319 for (i = 0; i < 16 && i < code_data_len; i++)
320 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
321 if (i < code_data_len)
322 sprintf(msg + strlen(msg), " ..");
323 yaz_marc_add_comment(mt, msg);
328 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
329 nmem_malloc(mt->nmem, sizeof(*n));
330 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
332 /* mark subfield_pp to point to this one, so we append here next */
333 *mt->subfield_pp = n;
334 mt->subfield_pp = &n->next;
338 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
339 int *indicator_length,
340 int *identifier_length,
342 int *length_data_entry,
343 int *length_starting,
344 int *length_implementation)
348 memcpy(leader, leader_c, 24);
350 if (!atoi_n_check(leader+10, 1, indicator_length))
353 "Indicator length at offset 10 should hold a digit."
356 *indicator_length = 2;
358 if (!atoi_n_check(leader+11, 1, identifier_length))
361 "Identifier length at offset 11 should hold a digit."
364 *identifier_length = 2;
366 if (!atoi_n_check(leader+12, 5, base_address))
369 "Base address at offsets 12..16 should hold a number."
373 if (!atoi_n_check(leader+20, 1, length_data_entry))
376 "Length data entry at offset 20 should hold a digit."
378 *length_data_entry = 4;
381 if (!atoi_n_check(leader+21, 1, length_starting))
384 "Length starting at offset 21 should hold a digit."
386 *length_starting = 5;
389 if (!atoi_n_check(leader+22, 1, length_implementation))
392 "Length implementation at offset 22 should hold a digit."
394 *length_implementation = 0;
400 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
401 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
402 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
403 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
404 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
405 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
407 yaz_marc_add_leader(mt, leader, 24);
410 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
412 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
413 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
416 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
418 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
419 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
422 /* try to guess how many bytes the identifier really is! */
423 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
428 for (i = 1; i<5; i++)
431 size_t outbytesleft = sizeof(outbuf);
433 const char *inp = buf;
435 size_t inbytesleft = i;
436 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
437 &outp, &outbytesleft);
438 if (r != (size_t) (-1))
439 return i; /* got a complete sequence */
441 return 1; /* giving up */
443 return 1; /* we don't know */
446 void yaz_marc_reset(yaz_marc_t mt)
448 nmem_reset(mt->nmem);
450 mt->nodes_pp = &mt->nodes;
454 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
456 struct yaz_marc_node *n;
457 int identifier_length;
458 const char *leader = 0;
460 for (n = mt->nodes; n; n = n->next)
461 if (n->which == YAZ_MARC_LEADER)
463 leader = n->u.leader;
469 if (!atoi_n_check(leader+11, 1, &identifier_length))
472 for (n = mt->nodes; n; n = n->next)
476 case YAZ_MARC_COMMENT:
477 wrbuf_iconv_write(wr, mt->iconv_cd,
478 n->u.comment, strlen(n->u.comment));
479 wrbuf_puts(wr, "\n");
488 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
489 int identifier_length)
491 /* if identifier length is 2 (most MARCs) or less (probably an error),
492 the code is a single character .. However we've
493 seen multibyte codes, so see how big it really is */
494 if (identifier_length > 2)
495 return identifier_length - 1;
497 return cdata_one_character(mt, data);
500 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
502 struct yaz_marc_node *n;
503 int identifier_length;
504 const char *leader = 0;
506 for (n = mt->nodes; n; n = n->next)
507 if (n->which == YAZ_MARC_LEADER)
509 leader = n->u.leader;
515 if (!atoi_n_check(leader+11, 1, &identifier_length))
518 for (n = mt->nodes; n; n = n->next)
520 struct yaz_marc_subfield *s;
523 case YAZ_MARC_DATAFIELD:
524 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
525 n->u.datafield.indicator);
526 for (s = n->u.datafield.subfields; s; s = s->next)
528 size_t using_code_len = get_subfield_len(mt, s->code_data,
531 wrbuf_puts (wr, mt->subfield_str);
532 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
534 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
535 wrbuf_iconv_puts(wr, mt->iconv_cd,
536 s->code_data + using_code_len);
537 marc_iconv_reset(mt, wr);
539 wrbuf_puts (wr, mt->endline_str);
541 case YAZ_MARC_CONTROLFIELD:
542 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
543 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
544 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
545 marc_iconv_reset(mt, wr);
546 wrbuf_puts (wr, mt->endline_str);
548 case YAZ_MARC_COMMENT:
550 wrbuf_iconv_write(wr, mt->iconv_cd,
551 n->u.comment, strlen(n->u.comment));
552 marc_iconv_reset(mt, wr);
553 wrbuf_puts(wr, ")\n");
555 case YAZ_MARC_LEADER:
556 wrbuf_printf(wr, "%s\n", n->u.leader);
559 wrbuf_puts(wr, "\n");
563 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
565 if (mt->enable_collection == collection_second)
567 switch(mt->output_format)
569 case YAZ_MARC_MARCXML:
570 case YAZ_MARC_TMARCXML:
571 wrbuf_printf(wr, "</collection>\n");
573 case YAZ_MARC_XCHANGE:
574 wrbuf_printf(wr, "</collection>\n");
581 void yaz_marc_enable_collection(yaz_marc_t mt)
583 mt->enable_collection = collection_first;
586 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
588 switch(mt->output_format)
591 return yaz_marc_write_line(mt, wr);
592 case YAZ_MARC_MARCXML:
593 case YAZ_MARC_TMARCXML:
594 return yaz_marc_write_marcxml(mt, wr);
595 case YAZ_MARC_XCHANGE:
596 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
597 case YAZ_MARC_ISO2709:
598 return yaz_marc_write_iso2709(mt, wr);
600 return yaz_marc_write_check(mt, wr);
605 const char *collection_name[2] = { "collection", "collection"};
606 const char *record_name[2] = { "record", "r"};
607 const char *leader_name[2] = { "leader", "l"};
608 const char *controlfield_name[2]= { "controlfield", "c"};
609 const char *datafield_name[2] = { "datafield", "d"};
610 const char *indicator_name[2] = { "ind", "i"};
611 const char *subfield_name[2] = { "subfield", "s"};
614 /** \brief common MARC XML/Xchange writer
616 \param wr WRBUF output
617 \param ns XMLNS for the elements
618 \param format record format (e.g. "MARC21")
619 \param type record type (e.g. "Bibliographic")
621 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
626 struct yaz_marc_node *n;
627 int identifier_length;
628 const char *leader = 0;
630 int turbo = yaz_marc_get_write_format(mt) == YAZ_MARC_TMARCXML;
632 for (n = mt->nodes; n; n = n->next)
633 if (n->which == YAZ_MARC_LEADER)
635 leader = n->u.leader;
641 if (!atoi_n_check(leader+11, 1, &identifier_length))
644 if (mt->enable_collection != no_collection)
646 if (mt->enable_collection == collection_first)
648 wrbuf_printf(wr, "<%s xmlns=\"%s\">\n", collection_name[turbo], ns);
649 mt->enable_collection = collection_second;
651 wrbuf_printf(wr, "<%s", record_name[turbo]);
655 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
658 wrbuf_printf(wr, " format=\"%.80s\"", format);
660 wrbuf_printf(wr, " type=\"%.80s\"", type);
661 wrbuf_printf(wr, ">\n");
662 for (n = mt->nodes; n; n = n->next)
664 struct yaz_marc_subfield *s;
668 case YAZ_MARC_DATAFIELD:
670 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
672 wrbuf_printf(wr, " tag=\"");
673 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
674 strlen(n->u.datafield.tag));
676 wrbuf_printf(wr, "\"");
677 if (n->u.datafield.indicator)
680 for (i = 0; n->u.datafield.indicator[i]; i++)
682 wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
683 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
684 n->u.datafield.indicator+i, 1);
685 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
688 wrbuf_printf(wr, ">\n");
689 for (s = n->u.datafield.subfields; s; s = s->next)
691 size_t using_code_len = get_subfield_len(mt, s->code_data,
693 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
696 wrbuf_printf(wr, " code=\"");
697 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
698 s->code_data, using_code_len);
699 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
703 element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
706 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
707 s->code_data + using_code_len,
708 strlen(s->code_data + using_code_len));
709 marc_iconv_reset(mt, wr);
710 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
712 element_name_append_attribute_value(mt, wr, 0, s->code_data, using_code_len);
713 wrbuf_puts(wr, ">\n");
715 wrbuf_printf(wr, " </%s", datafield_name[turbo]);
718 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
719 strlen(n->u.datafield.tag));
720 wrbuf_printf(wr, ">\n", datafield_name[turbo]);
722 case YAZ_MARC_CONTROLFIELD:
723 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
726 wrbuf_printf(wr, " tag=\"");
727 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
728 strlen(n->u.controlfield.tag));
729 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
733 //TODO convert special
734 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
735 strlen(n->u.controlfield.tag));
736 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
738 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
739 n->u.controlfield.data,
740 strlen(n->u.controlfield.data));
741 marc_iconv_reset(mt, wr);
742 wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
743 //TODO convert special
745 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
746 strlen(n->u.controlfield.tag));
747 wrbuf_puts(wr, ">\n");
749 case YAZ_MARC_COMMENT:
750 wrbuf_printf(wr, "<!-- ");
751 wrbuf_puts(wr, n->u.comment);
752 wrbuf_printf(wr, " -->\n");
754 case YAZ_MARC_LEADER:
755 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
756 wrbuf_iconv_write_cdata(wr,
757 0 , /* no charset conversion for leader */
758 n->u.leader, strlen(n->u.leader));
759 wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
762 wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
766 static int yaz_marc_write_marcxml_ns2(yaz_marc_t mt, WRBUF wr,
771 struct yaz_marc_node *n;
772 int identifier_length;
773 const char *leader = 0;
775 for (n = mt->nodes; n; n = n->next)
776 if (n->which == YAZ_MARC_LEADER)
778 leader = n->u.leader;
784 if (!atoi_n_check(leader+11, 1, &identifier_length))
787 if (mt->enable_collection != no_collection)
789 if (mt->enable_collection == collection_first)
790 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
791 mt->enable_collection = collection_second;
792 wrbuf_printf(wr, "<record");
796 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
799 wrbuf_printf(wr, " format=\"%.80s\"", format);
801 wrbuf_printf(wr, " type=\"%.80s\"", type);
802 wrbuf_printf(wr, ">\n");
803 for (n = mt->nodes; n; n = n->next)
805 struct yaz_marc_subfield *s;
809 case YAZ_MARC_DATAFIELD:
810 wrbuf_printf(wr, " <datafield tag=\"");
811 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
812 strlen(n->u.datafield.tag));
813 wrbuf_printf(wr, "\"");
814 if (n->u.datafield.indicator)
817 for (i = 0; n->u.datafield.indicator[i]; i++)
819 wrbuf_printf(wr, " ind%d=\"", i+1);
820 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
821 n->u.datafield.indicator+i, 1);
822 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
825 wrbuf_printf(wr, ">\n");
826 for (s = n->u.datafield.subfields; s; s = s->next)
828 size_t using_code_len = get_subfield_len(mt, s->code_data,
830 wrbuf_iconv_puts(wr, mt->iconv_cd, " <subfield code=\"");
831 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
832 s->code_data, using_code_len);
833 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
834 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
835 s->code_data + using_code_len,
836 strlen(s->code_data + using_code_len));
837 marc_iconv_reset(mt, wr);
838 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
839 wrbuf_puts(wr, "\n");
841 wrbuf_printf(wr, " </datafield>\n");
843 case YAZ_MARC_CONTROLFIELD:
844 wrbuf_printf(wr, " <controlfield tag=\"");
845 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
846 strlen(n->u.controlfield.tag));
847 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
848 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
849 n->u.controlfield.data,
850 strlen(n->u.controlfield.data));
852 marc_iconv_reset(mt, wr);
853 wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
854 wrbuf_puts(wr, "\n");
856 case YAZ_MARC_COMMENT:
857 wrbuf_printf(wr, "<!-- ");
858 wrbuf_puts(wr, n->u.comment);
859 wrbuf_printf(wr, " -->\n");
861 case YAZ_MARC_LEADER:
862 wrbuf_printf(wr, " <leader>");
863 wrbuf_iconv_write_cdata(wr,
864 0 /* no charset conversion for leader */,
865 n->u.leader, strlen(n->u.leader));
866 wrbuf_printf(wr, "</leader>\n");
869 wrbuf_puts(wr, "</record>\n");
874 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
879 if (mt->write_using_libxml2)
885 if (yaz_marc_get_write_format(mt) == YAZ_MARC_MARCXML)
886 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
887 else // Check for Turbo XML
888 ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type);
892 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
895 xmlDocSetRootElement(doc, root_ptr);
896 xmlDocDumpMemory(doc, &buf_out, &len_out);
898 wrbuf_write(wr, (const char *) buf_out, len_out);
909 return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
912 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
914 /* set leader 09 to 'a' for UNICODE */
915 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
916 if (!mt->leader_spec)
917 yaz_marc_modify_leader(mt, 9, "a");
918 char *name_space = "http://www.loc.gov/MARC21/slim";
919 if (mt->output_format == YAZ_MARC_TMARCXML)
920 name_space = "http://www.indexdata.com/MARC21/turboxml";
921 return yaz_marc_write_marcxml_ns(mt, wr, name_space,
925 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
929 return yaz_marc_write_marcxml_ns(mt, wr,
930 "info:lc/xmlns/marcxchange-v1",
936 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length)
939 struct yaz_marc_subfield *s;
940 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
943 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
944 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
948 //TODO consider if safe
951 strncpy(field + 1, n->u.datafield.tag, 3);
953 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
955 if (n->u.datafield.indicator)
958 for (i = 0; n->u.datafield.indicator[i]; i++)
963 ind_val[0] = n->u.datafield.indicator[i];
965 sprintf(ind_str, "%s%d", indicator_name[turbo], i+1);
966 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
969 WRBUF subfield_name = wrbuf_alloc();
970 for (s = n->u.datafield.subfields; s; s = s->next)
972 xmlNode *ptr_subfield;
973 size_t using_code_len = get_subfield_len(mt, s->code_data,
975 wrbuf_rewind(wr_cdata);
976 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
977 marc_iconv_reset(mt, wr_cdata);
981 ptr_subfield = xmlNewTextChild(
983 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
984 // Generate code attribute value and add
985 wrbuf_rewind(wr_cdata);
986 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
987 xmlNewProp(ptr_subfield, BAD_CAST "code",
988 BAD_CAST wrbuf_cstr(wr_cdata));
992 wrbuf_rewind(subfield_name);
993 wrbuf_puts(subfield_name, "s");
994 int not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0;
995 ptr_subfield = xmlNewTextChild(ptr, ns_record,
996 BAD_CAST wrbuf_cstr(subfield_name),
997 BAD_CAST wrbuf_cstr(wr_cdata));
1000 // Generate code attribute value and add
1001 wrbuf_rewind(wr_cdata);
1002 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
1003 xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata));
1007 wrbuf_destroy(subfield_name);
1010 int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
1015 struct yaz_marc_node *n;
1016 int identifier_length;
1017 const char *leader = 0;
1018 xmlNode *record_ptr;
1021 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
1022 for (n = mt->nodes; n; n = n->next)
1023 if (n->which == YAZ_MARC_LEADER)
1025 leader = n->u.leader;
1031 if (!atoi_n_check(leader+11, 1, &identifier_length))
1034 wr_cdata = wrbuf_alloc();
1036 record_ptr = xmlNewNode(0, BAD_CAST "r");
1037 *root_ptr = record_ptr;
1039 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1040 xmlSetNs(record_ptr, ns_record);
1043 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1045 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1046 for (n = mt->nodes; n; n = n->next)
1048 struct yaz_marc_subfield *s;
1053 case YAZ_MARC_DATAFIELD:
1054 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
1056 case YAZ_MARC_CONTROLFIELD:
1057 wrbuf_rewind(wr_cdata);
1058 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1059 marc_iconv_reset(mt, wr_cdata);
1063 ptr = xmlNewTextChild(record_ptr, ns_record,
1064 BAD_CAST "controlfield",
1065 BAD_CAST wrbuf_cstr(wr_cdata));
1066 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1070 // TODO required iconv?
1073 strncpy(field + 1, n->u.controlfield.tag, 3);
1075 ptr = xmlNewTextChild(record_ptr, ns_record,
1077 BAD_CAST wrbuf_cstr(wr_cdata));
1081 case YAZ_MARC_COMMENT:
1082 ptr = xmlNewComment(BAD_CAST n->u.comment);
1083 xmlAddChild(record_ptr, ptr);
1085 case YAZ_MARC_LEADER:
1087 char *field = "leader";
1090 xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
1091 BAD_CAST n->u.leader);
1096 wrbuf_destroy(wr_cdata);
1101 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
1106 struct yaz_marc_node *n;
1107 int identifier_length;
1108 const char *leader = 0;
1109 xmlNode *record_ptr;
1113 for (n = mt->nodes; n; n = n->next)
1114 if (n->which == YAZ_MARC_LEADER)
1116 leader = n->u.leader;
1122 if (!atoi_n_check(leader+11, 1, &identifier_length))
1125 wr_cdata = wrbuf_alloc();
1127 record_ptr = xmlNewNode(0, BAD_CAST "record");
1128 *root_ptr = record_ptr;
1130 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1131 xmlSetNs(record_ptr, ns_record);
1134 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1136 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1137 for (n = mt->nodes; n; n = n->next)
1139 struct yaz_marc_subfield *s;
1144 case YAZ_MARC_DATAFIELD:
1145 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1146 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1147 if (n->u.datafield.indicator)
1150 for (i = 0; n->u.datafield.indicator[i]; i++)
1155 sprintf(ind_str, "ind%d", i+1);
1156 ind_val[0] = n->u.datafield.indicator[i];
1158 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1161 for (s = n->u.datafield.subfields; s; s = s->next)
1163 xmlNode *ptr_subfield;
1164 size_t using_code_len = get_subfield_len(mt, s->code_data,
1166 wrbuf_rewind(wr_cdata);
1167 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1168 s->code_data + using_code_len);
1169 marc_iconv_reset(mt, wr_cdata);
1170 ptr_subfield = xmlNewTextChild(
1172 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1174 wrbuf_rewind(wr_cdata);
1175 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1176 s->code_data, using_code_len);
1177 xmlNewProp(ptr_subfield, BAD_CAST "code",
1178 BAD_CAST wrbuf_cstr(wr_cdata));
1181 case YAZ_MARC_CONTROLFIELD:
1182 wrbuf_rewind(wr_cdata);
1183 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1184 marc_iconv_reset(mt, wr_cdata);
1186 ptr = xmlNewTextChild(record_ptr, ns_record,
1187 BAD_CAST "controlfield",
1188 BAD_CAST wrbuf_cstr(wr_cdata));
1190 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1192 case YAZ_MARC_COMMENT:
1193 ptr = xmlNewComment(BAD_CAST n->u.comment);
1194 xmlAddChild(record_ptr, ptr);
1196 case YAZ_MARC_LEADER:
1197 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1198 BAD_CAST n->u.leader);
1202 wrbuf_destroy(wr_cdata);
1211 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1213 struct yaz_marc_node *n;
1214 int indicator_length;
1215 int identifier_length;
1216 int length_data_entry;
1217 int length_starting;
1218 int length_implementation;
1219 int data_offset = 0;
1220 const char *leader = 0;
1221 WRBUF wr_dir, wr_head, wr_data_tmp;
1224 for (n = mt->nodes; n; n = n->next)
1225 if (n->which == YAZ_MARC_LEADER)
1226 leader = n->u.leader;
1230 if (!atoi_n_check(leader+10, 1, &indicator_length))
1232 if (!atoi_n_check(leader+11, 1, &identifier_length))
1234 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1236 if (!atoi_n_check(leader+21, 1, &length_starting))
1238 if (!atoi_n_check(leader+22, 1, &length_implementation))
1241 wr_data_tmp = wrbuf_alloc();
1242 wr_dir = wrbuf_alloc();
1243 for (n = mt->nodes; n; n = n->next)
1245 int data_length = 0;
1246 struct yaz_marc_subfield *s;
1250 case YAZ_MARC_DATAFIELD:
1251 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1252 data_length += indicator_length;
1253 wrbuf_rewind(wr_data_tmp);
1254 for (s = n->u.datafield.subfields; s; s = s->next)
1256 /* write dummy IDFS + content */
1257 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1258 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1259 marc_iconv_reset(mt, wr_data_tmp);
1261 /* write dummy FS (makes MARC-8 to become ASCII) */
1262 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1263 marc_iconv_reset(mt, wr_data_tmp);
1264 data_length += wrbuf_len(wr_data_tmp);
1266 case YAZ_MARC_CONTROLFIELD:
1267 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1269 wrbuf_rewind(wr_data_tmp);
1270 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1271 n->u.controlfield.data);
1272 marc_iconv_reset(mt, wr_data_tmp);
1273 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1274 marc_iconv_reset(mt, wr_data_tmp);
1275 data_length += wrbuf_len(wr_data_tmp);
1277 case YAZ_MARC_COMMENT:
1279 case YAZ_MARC_LEADER:
1284 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1285 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1286 data_offset += data_length;
1289 /* mark end of directory */
1290 wrbuf_putc(wr_dir, ISO2709_FS);
1292 /* base address of data (comes after leader+directory) */
1293 base_address = 24 + wrbuf_len(wr_dir);
1295 wr_head = wrbuf_alloc();
1297 /* write record length */
1298 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1299 /* from "original" leader */
1300 wrbuf_write(wr_head, leader+5, 7);
1301 /* base address of data */
1302 wrbuf_printf(wr_head, "%05d", base_address);
1303 /* from "original" leader */
1304 wrbuf_write(wr_head, leader+17, 7);
1306 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1307 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1308 wrbuf_destroy(wr_head);
1309 wrbuf_destroy(wr_dir);
1310 wrbuf_destroy(wr_data_tmp);
1312 for (n = mt->nodes; n; n = n->next)
1314 struct yaz_marc_subfield *s;
1318 case YAZ_MARC_DATAFIELD:
1319 wrbuf_printf(wr, "%.*s", indicator_length,
1320 n->u.datafield.indicator);
1321 for (s = n->u.datafield.subfields; s; s = s->next)
1323 wrbuf_putc(wr, ISO2709_IDFS);
1324 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1325 marc_iconv_reset(mt, wr);
1327 wrbuf_putc(wr, ISO2709_FS);
1329 case YAZ_MARC_CONTROLFIELD:
1330 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1331 marc_iconv_reset(mt, wr);
1332 wrbuf_putc(wr, ISO2709_FS);
1334 case YAZ_MARC_COMMENT:
1336 case YAZ_MARC_LEADER:
1340 wrbuf_printf(wr, "%c", ISO2709_RS);
1345 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1347 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1350 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1352 return -1; /* error */
1353 return r; /* OK, return length > 0 */
1356 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1357 const char **result, size_t *rsize)
1361 wrbuf_rewind(mt->m_wr);
1362 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1364 *result = wrbuf_cstr(mt->m_wr);
1366 *rsize = wrbuf_len(mt->m_wr);
1370 void yaz_marc_set_read_format(yaz_marc_t mt, int format)
1373 mt->input_format = format;
1376 int yaz_marc_get_read_format(yaz_marc_t mt)
1379 return mt->input_format;
1384 void yaz_marc_set_write_format(yaz_marc_t mt, int format)
1387 mt->output_format = format;
1391 int yaz_marc_get_write_format(yaz_marc_t mt)
1394 return mt->output_format;
1400 * Deprecated, use yaz_marc_set_write_format
1402 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1404 yaz_marc_set_write_format(mt, xmlmode);
1409 void yaz_marc_debug(yaz_marc_t mt, int level)
1415 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1420 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1422 return mt->iconv_cd;
1425 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1427 struct yaz_marc_node *n;
1429 for (n = mt->nodes; n; n = n->next)
1430 if (n->which == YAZ_MARC_LEADER)
1432 leader = n->u.leader;
1433 memcpy(leader+off, str, strlen(str));
1438 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1440 xfree(mt->leader_spec);
1441 mt->leader_spec = 0;
1444 char dummy_leader[24];
1445 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1447 mt->leader_spec = xstrdup(leader_spec);
1452 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1454 const char *cp = leader_spec;
1459 int no_read = 0, no = 0;
1461 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1462 if (no < 2 || no_read < 3)
1464 if (pos < 0 || (size_t) pos >= size)
1469 const char *vp = strchr(val+1, '\'');
1475 if (len + pos > size)
1477 memcpy(leader + pos, val+1, len);
1479 else if (*val >= '0' && *val <= '9')
1495 int yaz_marc_decode_formatstr(const char *arg)
1498 if (!strcmp(arg, "marc"))
1499 mode = YAZ_MARC_ISO2709;
1500 if (!strcmp(arg, "marcxml"))
1501 mode = YAZ_MARC_MARCXML;
1502 if (!strcmp(arg, "tmarcxml"))
1503 mode = YAZ_MARC_TMARCXML;
1504 if (!strcmp(arg, "marcxchange"))
1505 mode = YAZ_MARC_XCHANGE;
1506 if (!strcmp(arg, "line"))
1507 mode = YAZ_MARC_LINE;
1511 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1513 mt->write_using_libxml2 = enable;
1516 int yaz_marc_is_turbo_format(yaz_marc_t mt)
1518 return mt->output_format == YAZ_MARC_TMARCXML;
1525 * c-file-style: "Stroustrup"
1526 * indent-tabs-mode: nil
1528 * vim: shiftwidth=4 tabstop=8 expandtab