1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
35 enum yaz_collection_state {
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
45 YAZ_MARC_CONTROLFIELD,
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
54 struct yaz_marc_subfield *subfields;
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
63 /** \brief a comment node */
64 struct yaz_marc_comment {
68 /** \brief MARC node */
69 struct yaz_marc_node {
70 enum YAZ_MARC_NODE_TYPE which;
72 struct yaz_marc_datafield datafield;
73 struct yaz_marc_controlfield controlfield;
77 struct yaz_marc_node *next;
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
83 struct yaz_marc_subfield *next;
86 /** \brief the internals of a yaz_marc_t handle */
93 int write_using_libxml2;
94 enum yaz_collection_state enable_collection;
99 struct yaz_marc_node *nodes;
100 struct yaz_marc_node **nodes_pp;
101 struct yaz_marc_subfield **subfield_pp;
104 yaz_marc_t yaz_marc_create(void)
106 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
107 mt->output_format = YAZ_MARC_LINE;
109 mt->write_using_libxml2 = 0;
110 mt->enable_collection = no_collection;
111 mt->m_wr = wrbuf_alloc();
114 strcpy(mt->subfield_str, " $");
115 strcpy(mt->endline_str, "\n");
117 mt->nmem = nmem_create();
122 void yaz_marc_destroy(yaz_marc_t mt)
126 nmem_destroy(mt->nmem);
127 wrbuf_destroy(mt->m_wr);
128 xfree(mt->leader_spec);
132 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
137 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
139 wrbuf_iconv_reset(wr, mt->iconv_cd);
142 static int marc_exec_leader(const char *leader_spec, char *leader,
146 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
148 struct yaz_marc_node *n = (struct yaz_marc_node *)
149 nmem_malloc(mt->nmem, sizeof(*n));
152 mt->nodes_pp = &n->next;
157 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
158 const xmlNode *ptr_data)
160 struct yaz_marc_node *n = yaz_marc_add_node(mt);
161 n->which = YAZ_MARC_CONTROLFIELD;
162 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
163 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
166 void yaz_marc_add_controlfield_turbo_xml(yaz_marc_t mt, char *tag,
167 const xmlNode *ptr_data)
169 struct yaz_marc_node *n = yaz_marc_add_node(mt);
170 n->which = YAZ_MARC_CONTROLFIELD;
171 n->u.controlfield.tag = tag;
172 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
178 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
180 struct yaz_marc_node *n = yaz_marc_add_node(mt);
181 n->which = YAZ_MARC_COMMENT;
182 n->u.comment = nmem_strdup(mt->nmem, comment);
185 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
191 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
192 yaz_marc_add_comment(mt, buf);
196 int yaz_marc_get_debug(yaz_marc_t mt)
201 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
203 struct yaz_marc_node *n = yaz_marc_add_node(mt);
204 n->which = YAZ_MARC_LEADER;
205 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
206 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
209 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
210 const char *data, size_t data_len)
212 struct yaz_marc_node *n = yaz_marc_add_node(mt);
213 n->which = YAZ_MARC_CONTROLFIELD;
214 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
215 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
221 sprintf(msg, "controlfield:");
222 for (i = 0; i < 16 && i < data_len; i++)
223 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
225 sprintf(msg + strlen(msg), " ..");
226 yaz_marc_add_comment(mt, msg);
230 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
231 const char *indicator, size_t indicator_len)
233 struct yaz_marc_node *n = yaz_marc_add_node(mt);
234 n->which = YAZ_MARC_DATAFIELD;
235 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
236 n->u.datafield.indicator =
237 nmem_strdupn(mt->nmem, indicator, indicator_len);
238 n->u.datafield.subfields = 0;
240 /* make subfield_pp the current (last one) */
241 mt->subfield_pp = &n->u.datafield.subfields;
244 char *element_name_encode(yaz_marc_t mt, WRBUF buffer, char *code_data, size_t code_len) {
245 // TODO Map special codes to something possible for XML ELEMENT names
249 for (index = 0; index < code_len; index++) {
250 if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
251 (code_data[index] >= 'a' && code_data[index] <= 'z') ||
252 (code_data[index] >= 'A' && code_data[index] <= 'Z')))
256 wrbuf_iconv_write(buffer, mt->iconv_cd, code_data, code_len);
259 char temp[2*code_len + 1];
260 wrbuf_puts(buffer, "-");
262 for (index = 0; index < code_len; index++) {
263 sprintf(temp+2*index, "%02X", (unsigned char) code_data[index] & 0xFF);
265 temp[2*code_len+1] = 0;
266 wrbuf_puts(buffer, temp);
267 yaz_log(YLOG_WARN, "Using numeric value in element name: %s", temp);
272 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
273 const char *indicator, size_t indicator_len)
275 struct yaz_marc_node *n = yaz_marc_add_node(mt);
276 n->which = YAZ_MARC_DATAFIELD;
277 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
278 n->u.datafield.indicator =
279 nmem_strdupn(mt->nmem, indicator, indicator_len);
280 n->u.datafield.subfields = 0;
282 /* make subfield_pp the current (last one) */
283 mt->subfield_pp = &n->u.datafield.subfields;
286 void yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value, char *indicators)
288 struct yaz_marc_node *n = yaz_marc_add_node(mt);
289 n->which = YAZ_MARC_DATAFIELD;
290 n->u.datafield.tag = tag_value;
291 n->u.datafield.indicator = indicators;
292 n->u.datafield.subfields = 0;
294 // make subfield_pp the current (last one)
295 mt->subfield_pp = &n->u.datafield.subfields;
298 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
300 n->u.datafield.indicator = indicator;
305 void yaz_marc_add_subfield(yaz_marc_t mt,
306 const char *code_data, size_t code_data_len)
313 sprintf(msg, "subfield:");
314 for (i = 0; i < 16 && i < code_data_len; i++)
315 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
316 if (i < code_data_len)
317 sprintf(msg + strlen(msg), " ..");
318 yaz_marc_add_comment(mt, msg);
323 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
324 nmem_malloc(mt->nmem, sizeof(*n));
325 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
327 /* mark subfield_pp to point to this one, so we append here next */
328 *mt->subfield_pp = n;
329 mt->subfield_pp = &n->next;
333 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
334 int *indicator_length,
335 int *identifier_length,
337 int *length_data_entry,
338 int *length_starting,
339 int *length_implementation)
343 memcpy(leader, leader_c, 24);
345 if (!atoi_n_check(leader+10, 1, indicator_length))
348 "Indicator length at offset 10 should hold a digit."
351 *indicator_length = 2;
353 if (!atoi_n_check(leader+11, 1, identifier_length))
356 "Identifier length at offset 11 should hold a digit."
359 *identifier_length = 2;
361 if (!atoi_n_check(leader+12, 5, base_address))
364 "Base address at offsets 12..16 should hold a number."
368 if (!atoi_n_check(leader+20, 1, length_data_entry))
371 "Length data entry at offset 20 should hold a digit."
373 *length_data_entry = 4;
376 if (!atoi_n_check(leader+21, 1, length_starting))
379 "Length starting at offset 21 should hold a digit."
381 *length_starting = 5;
384 if (!atoi_n_check(leader+22, 1, length_implementation))
387 "Length implementation at offset 22 should hold a digit."
389 *length_implementation = 0;
395 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
396 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
397 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
398 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
399 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
400 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
402 yaz_marc_add_leader(mt, leader, 24);
405 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
407 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
408 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
411 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
413 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
414 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
417 /* try to guess how many bytes the identifier really is! */
418 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
423 for (i = 1; i<5; i++)
426 size_t outbytesleft = sizeof(outbuf);
428 const char *inp = buf;
430 size_t inbytesleft = i;
431 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
432 &outp, &outbytesleft);
433 if (r != (size_t) (-1))
434 return i; /* got a complete sequence */
436 return 1; /* giving up */
438 return 1; /* we don't know */
441 void yaz_marc_reset(yaz_marc_t mt)
443 nmem_reset(mt->nmem);
445 mt->nodes_pp = &mt->nodes;
449 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
451 struct yaz_marc_node *n;
452 int identifier_length;
453 const char *leader = 0;
455 for (n = mt->nodes; n; n = n->next)
456 if (n->which == YAZ_MARC_LEADER)
458 leader = n->u.leader;
464 if (!atoi_n_check(leader+11, 1, &identifier_length))
467 for (n = mt->nodes; n; n = n->next)
471 case YAZ_MARC_COMMENT:
472 wrbuf_iconv_write(wr, mt->iconv_cd,
473 n->u.comment, strlen(n->u.comment));
474 wrbuf_puts(wr, "\n");
483 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
484 int identifier_length)
486 /* if identifier length is 2 (most MARCs) or less (probably an error),
487 the code is a single character .. However we've
488 seen multibyte codes, so see how big it really is */
489 if (identifier_length > 2)
490 return identifier_length - 1;
492 return cdata_one_character(mt, data);
495 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
497 struct yaz_marc_node *n;
498 int identifier_length;
499 const char *leader = 0;
501 for (n = mt->nodes; n; n = n->next)
502 if (n->which == YAZ_MARC_LEADER)
504 leader = n->u.leader;
510 if (!atoi_n_check(leader+11, 1, &identifier_length))
513 for (n = mt->nodes; n; n = n->next)
515 struct yaz_marc_subfield *s;
518 case YAZ_MARC_DATAFIELD:
519 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
520 n->u.datafield.indicator);
521 for (s = n->u.datafield.subfields; s; s = s->next)
523 size_t using_code_len = get_subfield_len(mt, s->code_data,
526 wrbuf_puts (wr, mt->subfield_str);
527 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
529 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
530 wrbuf_iconv_puts(wr, mt->iconv_cd,
531 s->code_data + using_code_len);
532 marc_iconv_reset(mt, wr);
534 wrbuf_puts (wr, mt->endline_str);
536 case YAZ_MARC_CONTROLFIELD:
537 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
538 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
539 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
540 marc_iconv_reset(mt, wr);
541 wrbuf_puts (wr, mt->endline_str);
543 case YAZ_MARC_COMMENT:
545 wrbuf_iconv_write(wr, mt->iconv_cd,
546 n->u.comment, strlen(n->u.comment));
547 marc_iconv_reset(mt, wr);
548 wrbuf_puts(wr, ")\n");
550 case YAZ_MARC_LEADER:
551 wrbuf_printf(wr, "%s\n", n->u.leader);
554 wrbuf_puts(wr, "\n");
558 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
560 if (mt->enable_collection == collection_second)
562 switch(mt->output_format)
564 case YAZ_MARC_MARCXML:
565 case YAZ_MARC_TMARCXML:
566 wrbuf_printf(wr, "</collection>\n");
568 case YAZ_MARC_XCHANGE:
569 wrbuf_printf(wr, "</collection>\n");
576 void yaz_marc_enable_collection(yaz_marc_t mt)
578 mt->enable_collection = collection_first;
581 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
583 switch(mt->output_format)
586 return yaz_marc_write_line(mt, wr);
587 case YAZ_MARC_MARCXML:
588 case YAZ_MARC_TMARCXML:
589 return yaz_marc_write_marcxml(mt, wr);
590 case YAZ_MARC_XCHANGE:
591 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
592 case YAZ_MARC_ISO2709:
593 return yaz_marc_write_iso2709(mt, wr);
595 return yaz_marc_write_check(mt, wr);
600 const char *collection_name[2] = { "collection", "collection"};
601 const char *record_name[2] = { "record", "r"};
602 const char *leader_name[2] = { "leader", "l"};
603 const char *controlfield_name[2]= { "controlfield", "c"};
604 const char *datafield_name[2] = { "datafield", "d"};
605 const char *indicator_name[2] = { "ind", "i"};
606 const char *subfield_name[2] = { "subfield", "s"};
609 /** \brief common MARC XML/Xchange writer
611 \param wr WRBUF output
612 \param ns XMLNS for the elements
613 \param format record format (e.g. "MARC21")
614 \param type record type (e.g. "Bibliographic")
616 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
621 struct yaz_marc_node *n;
622 int identifier_length;
623 const char *leader = 0;
625 int turbo = yaz_marc_get_write_format(mt) == YAZ_MARC_TMARCXML;
627 for (n = mt->nodes; n; n = n->next)
628 if (n->which == YAZ_MARC_LEADER)
630 leader = n->u.leader;
636 if (!atoi_n_check(leader+11, 1, &identifier_length))
639 if (mt->enable_collection != no_collection)
641 if (mt->enable_collection == collection_first) {
642 wrbuf_printf(wr, "<%s xmlns=\"%s\">\n", collection_name[turbo], ns);
643 mt->enable_collection = collection_second;
645 wrbuf_printf(wr, "<%s", record_name[turbo]);
649 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
652 wrbuf_printf(wr, " format=\"%.80s\"", format);
654 wrbuf_printf(wr, " type=\"%.80s\"", type);
655 wrbuf_printf(wr, ">\n");
656 for (n = mt->nodes; n; n = n->next)
658 struct yaz_marc_subfield *s;
662 case YAZ_MARC_DATAFIELD:
664 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
666 wrbuf_printf(wr, " tag=\"");
667 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
668 strlen(n->u.datafield.tag));
670 wrbuf_printf(wr, "\"");
671 if (n->u.datafield.indicator)
674 for (i = 0; n->u.datafield.indicator[i]; i++)
676 wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
677 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
678 n->u.datafield.indicator+i, 1);
679 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
682 wrbuf_printf(wr, ">\n");
683 for (s = n->u.datafield.subfields; s; s = s->next)
685 size_t using_code_len = get_subfield_len(mt, s->code_data,
687 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
689 wrbuf_printf(wr, " code=\"");
690 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
691 s->code_data, using_code_len);
692 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
694 element_name_encode(mt, wr, s->code_data, using_code_len);
697 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
698 s->code_data + using_code_len,
699 strlen(s->code_data + using_code_len));
700 marc_iconv_reset(mt, wr);
701 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
703 element_name_encode(mt, wr, s->code_data, using_code_len);
704 wrbuf_puts(wr, ">\n");
706 wrbuf_printf(wr, " </%s", datafield_name[turbo]);
709 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
710 strlen(n->u.datafield.tag));
711 wrbuf_printf(wr, ">\n", datafield_name[turbo]);
713 case YAZ_MARC_CONTROLFIELD:
714 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
716 wrbuf_printf(wr, " tag=\"");
717 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
718 strlen(n->u.controlfield.tag));
719 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
722 //TODO convert special
723 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
724 strlen(n->u.controlfield.tag));
725 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
727 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
728 n->u.controlfield.data,
729 strlen(n->u.controlfield.data));
730 marc_iconv_reset(mt, wr);
731 wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
732 //TODO convert special
734 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
735 strlen(n->u.controlfield.tag));
736 wrbuf_puts(wr, ">\n");
738 case YAZ_MARC_COMMENT:
739 wrbuf_printf(wr, "<!-- ");
740 wrbuf_puts(wr, n->u.comment);
741 wrbuf_printf(wr, " -->\n");
743 case YAZ_MARC_LEADER:
744 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
745 wrbuf_iconv_write_cdata(wr,
746 0 , /* no charset conversion for leader */
747 n->u.leader, strlen(n->u.leader));
748 wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
751 wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
755 static int yaz_marc_write_marcxml_ns2(yaz_marc_t mt, WRBUF wr,
760 struct yaz_marc_node *n;
761 int identifier_length;
762 const char *leader = 0;
764 for (n = mt->nodes; n; n = n->next)
765 if (n->which == YAZ_MARC_LEADER)
767 leader = n->u.leader;
773 if (!atoi_n_check(leader+11, 1, &identifier_length))
776 if (mt->enable_collection != no_collection)
778 if (mt->enable_collection == collection_first)
779 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
780 mt->enable_collection = collection_second;
781 wrbuf_printf(wr, "<record");
785 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
788 wrbuf_printf(wr, " format=\"%.80s\"", format);
790 wrbuf_printf(wr, " type=\"%.80s\"", type);
791 wrbuf_printf(wr, ">\n");
792 for (n = mt->nodes; n; n = n->next)
794 struct yaz_marc_subfield *s;
798 case YAZ_MARC_DATAFIELD:
799 wrbuf_printf(wr, " <datafield tag=\"");
800 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
801 strlen(n->u.datafield.tag));
802 wrbuf_printf(wr, "\"");
803 if (n->u.datafield.indicator)
806 for (i = 0; n->u.datafield.indicator[i]; i++)
808 wrbuf_printf(wr, " ind%d=\"", i+1);
809 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
810 n->u.datafield.indicator+i, 1);
811 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
814 wrbuf_printf(wr, ">\n");
815 for (s = n->u.datafield.subfields; s; s = s->next)
817 size_t using_code_len = get_subfield_len(mt, s->code_data,
819 wrbuf_iconv_puts(wr, mt->iconv_cd, " <subfield code=\"");
820 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
821 s->code_data, using_code_len);
822 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
823 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
824 s->code_data + using_code_len,
825 strlen(s->code_data + using_code_len));
826 marc_iconv_reset(mt, wr);
827 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
828 wrbuf_puts(wr, "\n");
830 wrbuf_printf(wr, " </datafield>\n");
832 case YAZ_MARC_CONTROLFIELD:
833 wrbuf_printf(wr, " <controlfield tag=\"");
834 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
835 strlen(n->u.controlfield.tag));
836 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
837 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
838 n->u.controlfield.data,
839 strlen(n->u.controlfield.data));
841 marc_iconv_reset(mt, wr);
842 wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
843 wrbuf_puts(wr, "\n");
845 case YAZ_MARC_COMMENT:
846 wrbuf_printf(wr, "<!-- ");
847 wrbuf_puts(wr, n->u.comment);
848 wrbuf_printf(wr, " -->\n");
850 case YAZ_MARC_LEADER:
851 wrbuf_printf(wr, " <leader>");
852 wrbuf_iconv_write_cdata(wr,
853 0 /* no charset conversion for leader */,
854 n->u.leader, strlen(n->u.leader));
855 wrbuf_printf(wr, "</leader>\n");
858 wrbuf_puts(wr, "</record>\n");
863 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
868 if (mt->write_using_libxml2)
874 if (yaz_marc_get_write_format(mt) == YAZ_MARC_MARCXML)
875 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
876 else // Check for Turbo XML
877 ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type);
881 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
884 xmlDocSetRootElement(doc, root_ptr);
885 xmlDocDumpMemory(doc, &buf_out, &len_out);
887 wrbuf_write(wr, (const char *) buf_out, len_out);
898 return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
901 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
903 /* set leader 09 to 'a' for UNICODE */
904 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
905 if (!mt->leader_spec)
906 yaz_marc_modify_leader(mt, 9, "a");
907 char *name_space = "http://www.loc.gov/MARC21/slim";
908 if (mt->output_format == YAZ_MARC_TMARCXML)
909 name_space = "http://www.indexdata.com/MARC21/turboxml";
910 return yaz_marc_write_marcxml_ns(mt, wr, name_space,
914 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
918 return yaz_marc_write_marcxml_ns(mt, wr,
919 "info:lc/xmlns/marcxchange-v1",
925 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length)
928 struct yaz_marc_subfield *s;
929 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
931 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
932 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
935 //TODO consider if safe
938 strncpy(field + 1, n->u.datafield.tag, 3);
940 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
942 if (n->u.datafield.indicator)
945 for (i = 0; n->u.datafield.indicator[i]; i++)
950 ind_val[0] = n->u.datafield.indicator[i];
952 sprintf(ind_str, "%s%d", indicator_name[turbo], i+1);
953 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
956 WRBUF subfield_name = wrbuf_alloc();
957 for (s = n->u.datafield.subfields; s; s = s->next)
959 xmlNode *ptr_subfield;
960 size_t using_code_len = get_subfield_len(mt, s->code_data,
962 wrbuf_rewind(wr_cdata);
963 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
964 marc_iconv_reset(mt, wr_cdata);
967 ptr_subfield = xmlNewTextChild(
969 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
970 wrbuf_rewind(wr_cdata);
971 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
972 xmlNewProp(ptr_subfield, BAD_CAST "code",
973 BAD_CAST wrbuf_cstr(wr_cdata));
975 else { // Turbo format
976 wrbuf_rewind(subfield_name);
977 wrbuf_puts(subfield_name, "s");
978 element_name_encode(mt, subfield_name, s->code_data, using_code_len);
979 ptr_subfield = xmlNewTextChild(ptr, ns_record,
980 BAD_CAST wrbuf_cstr(subfield_name),
981 BAD_CAST wrbuf_cstr(wr_cdata));
984 wrbuf_destroy(subfield_name);
987 int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
992 struct yaz_marc_node *n;
993 int identifier_length;
994 const char *leader = 0;
998 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
999 for (n = mt->nodes; n; n = n->next)
1000 if (n->which == YAZ_MARC_LEADER)
1002 leader = n->u.leader;
1008 if (!atoi_n_check(leader+11, 1, &identifier_length))
1011 wr_cdata = wrbuf_alloc();
1013 record_ptr = xmlNewNode(0, BAD_CAST "r");
1014 *root_ptr = record_ptr;
1016 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1017 xmlSetNs(record_ptr, ns_record);
1020 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1022 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1023 for (n = mt->nodes; n; n = n->next)
1025 struct yaz_marc_subfield *s;
1030 case YAZ_MARC_DATAFIELD:
1031 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
1033 case YAZ_MARC_CONTROLFIELD:
1034 wrbuf_rewind(wr_cdata);
1035 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1036 marc_iconv_reset(mt, wr_cdata);
1039 ptr = xmlNewTextChild(record_ptr, ns_record,
1040 BAD_CAST "controlfield",
1041 BAD_CAST wrbuf_cstr(wr_cdata));
1042 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1045 // TODO required iconv?
1048 strncpy(field + 1, n->u.controlfield.tag, 3);
1050 ptr = xmlNewTextChild(record_ptr, ns_record,
1052 BAD_CAST wrbuf_cstr(wr_cdata));
1056 case YAZ_MARC_COMMENT:
1057 ptr = xmlNewComment(BAD_CAST n->u.comment);
1058 xmlAddChild(record_ptr, ptr);
1060 case YAZ_MARC_LEADER:
1062 char *field = "leader";
1065 xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
1066 BAD_CAST n->u.leader);
1071 wrbuf_destroy(wr_cdata);
1076 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
1081 struct yaz_marc_node *n;
1082 int identifier_length;
1083 const char *leader = 0;
1084 xmlNode *record_ptr;
1088 for (n = mt->nodes; n; n = n->next)
1089 if (n->which == YAZ_MARC_LEADER)
1091 leader = n->u.leader;
1097 if (!atoi_n_check(leader+11, 1, &identifier_length))
1100 wr_cdata = wrbuf_alloc();
1102 record_ptr = xmlNewNode(0, BAD_CAST "record");
1103 *root_ptr = record_ptr;
1105 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1106 xmlSetNs(record_ptr, ns_record);
1109 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1111 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1112 for (n = mt->nodes; n; n = n->next)
1114 struct yaz_marc_subfield *s;
1119 case YAZ_MARC_DATAFIELD:
1120 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1121 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1122 if (n->u.datafield.indicator)
1125 for (i = 0; n->u.datafield.indicator[i]; i++)
1130 sprintf(ind_str, "ind%d", i+1);
1131 ind_val[0] = n->u.datafield.indicator[i];
1133 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1136 for (s = n->u.datafield.subfields; s; s = s->next)
1138 xmlNode *ptr_subfield;
1139 size_t using_code_len = get_subfield_len(mt, s->code_data,
1141 wrbuf_rewind(wr_cdata);
1142 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1143 s->code_data + using_code_len);
1144 marc_iconv_reset(mt, wr_cdata);
1145 ptr_subfield = xmlNewTextChild(
1147 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1149 wrbuf_rewind(wr_cdata);
1150 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1151 s->code_data, using_code_len);
1152 xmlNewProp(ptr_subfield, BAD_CAST "code",
1153 BAD_CAST wrbuf_cstr(wr_cdata));
1156 case YAZ_MARC_CONTROLFIELD:
1157 wrbuf_rewind(wr_cdata);
1158 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1159 marc_iconv_reset(mt, wr_cdata);
1161 ptr = xmlNewTextChild(record_ptr, ns_record,
1162 BAD_CAST "controlfield",
1163 BAD_CAST wrbuf_cstr(wr_cdata));
1165 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1167 case YAZ_MARC_COMMENT:
1168 ptr = xmlNewComment(BAD_CAST n->u.comment);
1169 xmlAddChild(record_ptr, ptr);
1171 case YAZ_MARC_LEADER:
1172 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1173 BAD_CAST n->u.leader);
1177 wrbuf_destroy(wr_cdata);
1186 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1188 struct yaz_marc_node *n;
1189 int indicator_length;
1190 int identifier_length;
1191 int length_data_entry;
1192 int length_starting;
1193 int length_implementation;
1194 int data_offset = 0;
1195 const char *leader = 0;
1196 WRBUF wr_dir, wr_head, wr_data_tmp;
1199 for (n = mt->nodes; n; n = n->next)
1200 if (n->which == YAZ_MARC_LEADER)
1201 leader = n->u.leader;
1205 if (!atoi_n_check(leader+10, 1, &indicator_length))
1207 if (!atoi_n_check(leader+11, 1, &identifier_length))
1209 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1211 if (!atoi_n_check(leader+21, 1, &length_starting))
1213 if (!atoi_n_check(leader+22, 1, &length_implementation))
1216 wr_data_tmp = wrbuf_alloc();
1217 wr_dir = wrbuf_alloc();
1218 for (n = mt->nodes; n; n = n->next)
1220 int data_length = 0;
1221 struct yaz_marc_subfield *s;
1225 case YAZ_MARC_DATAFIELD:
1226 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1227 data_length += indicator_length;
1228 wrbuf_rewind(wr_data_tmp);
1229 for (s = n->u.datafield.subfields; s; s = s->next)
1231 /* write dummy IDFS + content */
1232 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1233 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1234 marc_iconv_reset(mt, wr_data_tmp);
1236 /* write dummy FS (makes MARC-8 to become ASCII) */
1237 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1238 marc_iconv_reset(mt, wr_data_tmp);
1239 data_length += wrbuf_len(wr_data_tmp);
1241 case YAZ_MARC_CONTROLFIELD:
1242 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1244 wrbuf_rewind(wr_data_tmp);
1245 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1246 n->u.controlfield.data);
1247 marc_iconv_reset(mt, wr_data_tmp);
1248 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1249 marc_iconv_reset(mt, wr_data_tmp);
1250 data_length += wrbuf_len(wr_data_tmp);
1252 case YAZ_MARC_COMMENT:
1254 case YAZ_MARC_LEADER:
1259 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1260 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1261 data_offset += data_length;
1264 /* mark end of directory */
1265 wrbuf_putc(wr_dir, ISO2709_FS);
1267 /* base address of data (comes after leader+directory) */
1268 base_address = 24 + wrbuf_len(wr_dir);
1270 wr_head = wrbuf_alloc();
1272 /* write record length */
1273 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1274 /* from "original" leader */
1275 wrbuf_write(wr_head, leader+5, 7);
1276 /* base address of data */
1277 wrbuf_printf(wr_head, "%05d", base_address);
1278 /* from "original" leader */
1279 wrbuf_write(wr_head, leader+17, 7);
1281 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1282 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1283 wrbuf_destroy(wr_head);
1284 wrbuf_destroy(wr_dir);
1285 wrbuf_destroy(wr_data_tmp);
1287 for (n = mt->nodes; n; n = n->next)
1289 struct yaz_marc_subfield *s;
1293 case YAZ_MARC_DATAFIELD:
1294 wrbuf_printf(wr, "%.*s", indicator_length,
1295 n->u.datafield.indicator);
1296 for (s = n->u.datafield.subfields; s; s = s->next)
1298 wrbuf_putc(wr, ISO2709_IDFS);
1299 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1300 marc_iconv_reset(mt, wr);
1302 wrbuf_putc(wr, ISO2709_FS);
1304 case YAZ_MARC_CONTROLFIELD:
1305 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1306 marc_iconv_reset(mt, wr);
1307 wrbuf_putc(wr, ISO2709_FS);
1309 case YAZ_MARC_COMMENT:
1311 case YAZ_MARC_LEADER:
1315 wrbuf_printf(wr, "%c", ISO2709_RS);
1320 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1322 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1325 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1327 return -1; /* error */
1328 return r; /* OK, return length > 0 */
1331 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1332 const char **result, size_t *rsize)
1336 wrbuf_rewind(mt->m_wr);
1337 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1339 *result = wrbuf_cstr(mt->m_wr);
1341 *rsize = wrbuf_len(mt->m_wr);
1345 void yaz_marc_set_read_format(yaz_marc_t mt, int format)
1348 mt->input_format = format;
1351 int yaz_marc_get_read_format(yaz_marc_t mt)
1354 return mt->input_format;
1359 void yaz_marc_set_write_format(yaz_marc_t mt, int format)
1362 mt->output_format = format;
1366 int yaz_marc_get_write_format(yaz_marc_t mt)
1369 return mt->output_format;
1375 * Deprecated, use yaz_marc_set_write_format
1377 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1379 yaz_marc_set_write_format(mt, xmlmode);
1384 void yaz_marc_debug(yaz_marc_t mt, int level)
1390 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1395 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1397 return mt->iconv_cd;
1400 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1402 struct yaz_marc_node *n;
1404 for (n = mt->nodes; n; n = n->next)
1405 if (n->which == YAZ_MARC_LEADER)
1407 leader = n->u.leader;
1408 memcpy(leader+off, str, strlen(str));
1413 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1415 xfree(mt->leader_spec);
1416 mt->leader_spec = 0;
1419 char dummy_leader[24];
1420 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1422 mt->leader_spec = xstrdup(leader_spec);
1427 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1429 const char *cp = leader_spec;
1434 int no_read = 0, no = 0;
1436 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1437 if (no < 2 || no_read < 3)
1439 if (pos < 0 || (size_t) pos >= size)
1444 const char *vp = strchr(val+1, '\'');
1450 if (len + pos > size)
1452 memcpy(leader + pos, val+1, len);
1454 else if (*val >= '0' && *val <= '9')
1470 int yaz_marc_decode_formatstr(const char *arg)
1473 if (!strcmp(arg, "marc"))
1474 mode = YAZ_MARC_ISO2709;
1475 if (!strcmp(arg, "marcxml"))
1476 mode = YAZ_MARC_MARCXML;
1477 if (!strcmp(arg, "tmarcxml"))
1478 mode = YAZ_MARC_TMARCXML;
1479 if (!strcmp(arg, "marcxchange"))
1480 mode = YAZ_MARC_XCHANGE;
1481 if (!strcmp(arg, "line"))
1482 mode = YAZ_MARC_LINE;
1486 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1488 mt->write_using_libxml2 = enable;
1491 int yaz_marc_is_turbo_format(yaz_marc_t mt)
1493 return mt->output_format == YAZ_MARC_TMARCXML;
1500 * c-file-style: "Stroustrup"
1501 * indent-tabs-mode: nil
1503 * vim: shiftwidth=4 tabstop=8 expandtab