1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2013 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
23 #include <yaz/marcdisp.h>
24 #include <yaz/wrbuf.h>
25 #include <yaz/yaz-util.h>
26 #include <yaz/nmem_xml.h>
27 #include <yaz/snprintf.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
34 enum yaz_collection_state {
40 /** \brief node types for yaz_marc_node */
41 enum YAZ_MARC_NODE_TYPE
44 YAZ_MARC_CONTROLFIELD,
49 /** \brief represets a data field */
50 struct yaz_marc_datafield {
53 struct yaz_marc_subfield *subfields;
56 /** \brief represents a control field */
57 struct yaz_marc_controlfield {
62 /** \brief a comment node */
63 struct yaz_marc_comment {
67 /** \brief MARC node */
68 struct yaz_marc_node {
69 enum YAZ_MARC_NODE_TYPE which;
71 struct yaz_marc_datafield datafield;
72 struct yaz_marc_controlfield controlfield;
76 struct yaz_marc_node *next;
79 /** \brief represents a subfield */
80 struct yaz_marc_subfield {
82 struct yaz_marc_subfield *next;
85 /** \brief the internals of a yaz_marc_t handle */
91 int write_using_libxml2;
92 enum yaz_collection_state enable_collection;
97 struct yaz_marc_node *nodes;
98 struct yaz_marc_node **nodes_pp;
99 struct yaz_marc_subfield **subfield_pp;
102 yaz_marc_t yaz_marc_create(void)
104 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
105 mt->output_format = YAZ_MARC_LINE;
107 mt->write_using_libxml2 = 0;
108 mt->enable_collection = no_collection;
109 mt->m_wr = wrbuf_alloc();
112 strcpy(mt->subfield_str, " $");
113 strcpy(mt->endline_str, "\n");
115 mt->nmem = nmem_create();
120 void yaz_marc_destroy(yaz_marc_t mt)
124 nmem_destroy(mt->nmem);
125 wrbuf_destroy(mt->m_wr);
126 xfree(mt->leader_spec);
130 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
135 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
137 wrbuf_iconv_reset(wr, mt->iconv_cd);
140 static int marc_exec_leader(const char *leader_spec, char *leader,
143 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
149 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
151 struct yaz_marc_node *n = (struct yaz_marc_node *)
152 nmem_malloc(mt->nmem, sizeof(*n));
155 mt->nodes_pp = &n->next;
160 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
161 const xmlNode *ptr_data)
163 struct yaz_marc_node *n = yaz_marc_add_node(mt);
164 n->which = YAZ_MARC_CONTROLFIELD;
165 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
166 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
169 void yaz_marc_add_controlfield_xml2(yaz_marc_t mt, char *tag,
170 const xmlNode *ptr_data)
172 struct yaz_marc_node *n = yaz_marc_add_node(mt);
173 n->which = YAZ_MARC_CONTROLFIELD;
174 n->u.controlfield.tag = tag;
175 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
181 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
183 struct yaz_marc_node *n = yaz_marc_add_node(mt);
184 n->which = YAZ_MARC_COMMENT;
185 n->u.comment = nmem_strdup(mt->nmem, comment);
188 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
194 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
195 yaz_marc_add_comment(mt, buf);
199 int yaz_marc_get_debug(yaz_marc_t mt)
204 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
206 struct yaz_marc_node *n = yaz_marc_add_node(mt);
207 n->which = YAZ_MARC_LEADER;
208 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
209 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
212 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
213 const char *data, size_t data_len)
215 struct yaz_marc_node *n = yaz_marc_add_node(mt);
216 n->which = YAZ_MARC_CONTROLFIELD;
217 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
218 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
224 sprintf(msg, "controlfield:");
225 for (i = 0; i < 16 && i < data_len; i++)
226 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
228 sprintf(msg + strlen(msg), " ..");
229 yaz_marc_add_comment(mt, msg);
233 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
234 const char *indicator, size_t indicator_len)
236 struct yaz_marc_node *n = yaz_marc_add_node(mt);
237 n->which = YAZ_MARC_DATAFIELD;
238 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
239 n->u.datafield.indicator =
240 nmem_strdupn(mt->nmem, indicator, indicator_len);
241 n->u.datafield.subfields = 0;
243 /* make subfield_pp the current (last one) */
244 mt->subfield_pp = &n->u.datafield.subfields;
247 /** \brief adds a attribute value to the element name if it is plain chars
249 If not, and if the attribute name is not null, it will append a
250 attribute element with the value if attribute name is null it will
251 return a non-zero value meaning it couldnt handle the value.
253 static int element_name_append_attribute_value(
254 yaz_marc_t mt, WRBUF buffer,
255 const char *attribute_name, char *code_data, size_t code_len)
257 /* TODO Map special codes to something possible for XML ELEMENT names */
262 for (index = 0; index < code_len; index++)
264 if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
265 (code_data[index] >= 'a' && code_data[index] <= 'z') ||
266 (code_data[index] >= 'A' && code_data[index] <= 'Z')))
269 /* Add as attribute */
270 if (encode && attribute_name)
271 wrbuf_printf(buffer, " %s=\"", attribute_name);
273 if (!encode || attribute_name)
274 wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len);
278 if (encode && attribute_name)
279 wrbuf_printf(buffer, "\""); /* return error if we couldn't handle it.*/
284 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
285 const char *indicator, size_t indicator_len)
287 struct yaz_marc_node *n = yaz_marc_add_node(mt);
288 n->which = YAZ_MARC_DATAFIELD;
289 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
290 n->u.datafield.indicator =
291 nmem_strdupn(mt->nmem, indicator, indicator_len);
292 n->u.datafield.subfields = 0;
294 /* make subfield_pp the current (last one) */
295 mt->subfield_pp = &n->u.datafield.subfields;
298 void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicators)
300 struct yaz_marc_node *n = yaz_marc_add_node(mt);
301 n->which = YAZ_MARC_DATAFIELD;
302 n->u.datafield.tag = tag_value;
303 n->u.datafield.indicator = indicators;
304 n->u.datafield.subfields = 0;
306 /* make subfield_pp the current (last one) */
307 mt->subfield_pp = &n->u.datafield.subfields;
310 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
312 n->u.datafield.indicator = indicator;
317 void yaz_marc_add_subfield(yaz_marc_t mt,
318 const char *code_data, size_t code_data_len)
325 sprintf(msg, "subfield:");
326 for (i = 0; i < 16 && i < code_data_len; i++)
327 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
328 if (i < code_data_len)
329 sprintf(msg + strlen(msg), " ..");
330 yaz_marc_add_comment(mt, msg);
335 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
336 nmem_malloc(mt->nmem, sizeof(*n));
337 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
339 /* mark subfield_pp to point to this one, so we append here next */
340 *mt->subfield_pp = n;
341 mt->subfield_pp = &n->next;
345 static void check_ascii(yaz_marc_t mt, char *leader, int offset,
348 if (leader[offset] < ' ' || leader[offset] > 127)
351 "Leader character at offset %d is non-ASCII. "
352 "Setting value to '%c'", offset, ch_default);
353 leader[offset] = ch_default;
357 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
358 int *indicator_length,
359 int *identifier_length,
361 int *length_data_entry,
362 int *length_starting,
363 int *length_implementation)
367 memcpy(leader, leader_c, 24);
369 check_ascii(mt, leader, 5, 'a');
370 check_ascii(mt, leader, 6, 'a');
371 check_ascii(mt, leader, 7, 'a');
372 check_ascii(mt, leader, 8, '#');
373 check_ascii(mt, leader, 9, '#');
374 if (!atoi_n_check(leader+10, 1, indicator_length) || *indicator_length == 0)
376 yaz_marc_cprintf(mt, "Indicator length at offset 10 should"
377 " hold a number 1-9. Assuming 2");
379 *indicator_length = 2;
381 if (!atoi_n_check(leader+11, 1, identifier_length) || *identifier_length == 0)
383 yaz_marc_cprintf(mt, "Identifier length at offset 11 should "
384 " hold a number 1-9. Assuming 2");
386 *identifier_length = 2;
388 if (!atoi_n_check(leader+12, 5, base_address))
390 yaz_marc_cprintf(mt, "Base address at offsets 12..16 should"
391 " hold a number. Assuming 0");
394 check_ascii(mt, leader, 17, '#');
395 check_ascii(mt, leader, 18, '#');
396 check_ascii(mt, leader, 19, '#');
397 if (!atoi_n_check(leader+20, 1, length_data_entry) ||
398 *length_data_entry < 3)
400 yaz_marc_cprintf(mt, "Length data entry at offset 20 should"
401 " hold a number 3-9. Assuming 4");
402 *length_data_entry = 4;
405 if (!atoi_n_check(leader+21, 1, length_starting) || *length_starting < 4)
407 yaz_marc_cprintf(mt, "Length starting at offset 21 should"
408 " hold a number 4-9. Assuming 5");
409 *length_starting = 5;
412 if (!atoi_n_check(leader+22, 1, length_implementation))
414 yaz_marc_cprintf(mt, "Length implementation at offset 22 should"
415 " hold a number. Assuming 0");
416 *length_implementation = 0;
419 check_ascii(mt, leader, 23, '0');
423 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
424 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
425 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
426 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
427 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
428 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
430 yaz_marc_add_leader(mt, leader, 24);
433 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
435 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
436 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
439 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
441 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
442 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
445 /* try to guess how many bytes the identifier really is! */
446 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
451 for (i = 1; i<5; i++)
454 size_t outbytesleft = sizeof(outbuf);
456 const char *inp = buf;
458 size_t inbytesleft = i;
459 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
460 &outp, &outbytesleft);
461 yaz_iconv(mt->iconv_cd, 0, 0, &outp, &outbytesleft);
462 if (r != (size_t) (-1))
463 return i; /* got a complete sequence */
465 return 1; /* giving up */
467 return 1; /* we don't know */
470 void yaz_marc_reset(yaz_marc_t mt)
472 nmem_reset(mt->nmem);
474 mt->nodes_pp = &mt->nodes;
478 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
480 struct yaz_marc_node *n;
481 int identifier_length;
482 const char *leader = 0;
484 for (n = mt->nodes; n; n = n->next)
485 if (n->which == YAZ_MARC_LEADER)
487 leader = n->u.leader;
493 if (!atoi_n_check(leader+11, 1, &identifier_length))
496 for (n = mt->nodes; n; n = n->next)
500 case YAZ_MARC_COMMENT:
501 wrbuf_iconv_write(wr, mt->iconv_cd,
502 n->u.comment, strlen(n->u.comment));
503 wrbuf_puts(wr, "\n");
512 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
513 int identifier_length)
515 /* if identifier length is 2 (most MARCs) or less (probably an error),
516 the code is a single character .. However we've
517 seen multibyte codes, so see how big it really is */
518 if (identifier_length > 2)
519 return identifier_length - 1;
521 return cdata_one_character(mt, data);
524 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
526 struct yaz_marc_node *n;
527 int identifier_length;
528 const char *leader = 0;
530 for (n = mt->nodes; n; n = n->next)
531 if (n->which == YAZ_MARC_LEADER)
533 leader = n->u.leader;
539 if (!atoi_n_check(leader+11, 1, &identifier_length))
542 for (n = mt->nodes; n; n = n->next)
544 struct yaz_marc_subfield *s;
547 case YAZ_MARC_DATAFIELD:
548 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
549 n->u.datafield.indicator);
550 for (s = n->u.datafield.subfields; s; s = s->next)
552 size_t using_code_len = get_subfield_len(mt, s->code_data,
555 wrbuf_puts (wr, mt->subfield_str);
556 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
558 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
559 wrbuf_iconv_puts(wr, mt->iconv_cd,
560 s->code_data + using_code_len);
561 marc_iconv_reset(mt, wr);
563 wrbuf_puts (wr, mt->endline_str);
565 case YAZ_MARC_CONTROLFIELD:
566 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
567 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
568 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
569 marc_iconv_reset(mt, wr);
570 wrbuf_puts (wr, mt->endline_str);
572 case YAZ_MARC_COMMENT:
574 wrbuf_iconv_write(wr, mt->iconv_cd,
575 n->u.comment, strlen(n->u.comment));
576 marc_iconv_reset(mt, wr);
577 wrbuf_puts(wr, ")\n");
579 case YAZ_MARC_LEADER:
580 wrbuf_printf(wr, "%s\n", n->u.leader);
583 wrbuf_puts(wr, "\n");
587 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
589 if (mt->enable_collection == collection_second)
591 switch(mt->output_format)
593 case YAZ_MARC_MARCXML:
594 case YAZ_MARC_TURBOMARC:
595 wrbuf_printf(wr, "</collection>\n");
597 case YAZ_MARC_XCHANGE:
598 wrbuf_printf(wr, "</collection>\n");
605 void yaz_marc_enable_collection(yaz_marc_t mt)
607 mt->enable_collection = collection_first;
610 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
612 switch(mt->output_format)
615 return yaz_marc_write_line(mt, wr);
616 case YAZ_MARC_MARCXML:
617 return yaz_marc_write_marcxml(mt, wr);
618 case YAZ_MARC_TURBOMARC:
619 return yaz_marc_write_turbomarc(mt, wr);
620 case YAZ_MARC_XCHANGE:
621 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
622 case YAZ_MARC_ISO2709:
623 return yaz_marc_write_iso2709(mt, wr);
625 return yaz_marc_write_check(mt, wr);
630 static const char *record_name[2] = { "record", "r"};
631 static const char *leader_name[2] = { "leader", "l"};
632 static const char *controlfield_name[2] = { "controlfield", "c"};
633 static const char *datafield_name[2] = { "datafield", "d"};
634 static const char *indicator_name[2] = { "ind", "i"};
635 static const char *subfield_name[2] = { "subfield", "s"};
637 /** \brief common MARC XML/Xchange/turbomarc writer
639 \param wr WRBUF output
640 \param ns XMLNS for the elements
641 \param format record format (e.g. "MARC21")
642 \param type record type (e.g. "Bibliographic")
643 \param turbo =1 for turbomarc
647 static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
653 struct yaz_marc_node *n;
654 int identifier_length;
655 const char *leader = 0;
657 for (n = mt->nodes; n; n = n->next)
658 if (n->which == YAZ_MARC_LEADER)
660 leader = n->u.leader;
666 if (!atoi_n_check(leader+11, 1, &identifier_length))
669 if (mt->enable_collection != no_collection)
671 if (mt->enable_collection == collection_first)
673 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
674 mt->enable_collection = collection_second;
676 wrbuf_printf(wr, "<%s", record_name[turbo]);
680 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
683 wrbuf_printf(wr, " format=\"%.80s\"", format);
685 wrbuf_printf(wr, " type=\"%.80s\"", type);
686 wrbuf_printf(wr, ">\n");
687 for (n = mt->nodes; n; n = n->next)
689 struct yaz_marc_subfield *s;
693 case YAZ_MARC_DATAFIELD:
695 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
697 wrbuf_printf(wr, " tag=\"");
698 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
699 strlen(n->u.datafield.tag));
701 wrbuf_printf(wr, "\"");
702 if (n->u.datafield.indicator)
705 for (i = 0; n->u.datafield.indicator[i]; i++)
707 wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
708 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
709 n->u.datafield.indicator+i, 1);
710 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
713 wrbuf_printf(wr, ">\n");
714 for (s = n->u.datafield.subfields; s; s = s->next)
716 size_t using_code_len = get_subfield_len(mt, s->code_data,
718 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
721 wrbuf_printf(wr, " code=\"");
722 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
723 s->code_data, using_code_len);
724 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
728 element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
731 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
732 s->code_data + using_code_len,
733 strlen(s->code_data + using_code_len));
734 marc_iconv_reset(mt, wr);
735 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
737 element_name_append_attribute_value(mt, wr, 0, s->code_data, using_code_len);
738 wrbuf_puts(wr, ">\n");
740 wrbuf_printf(wr, " </%s", datafield_name[turbo]);
743 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
744 strlen(n->u.datafield.tag));
745 wrbuf_printf(wr, ">\n");
747 case YAZ_MARC_CONTROLFIELD:
748 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
751 wrbuf_printf(wr, " tag=\"");
752 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
753 strlen(n->u.controlfield.tag));
754 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
758 /* TODO convert special */
759 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
760 strlen(n->u.controlfield.tag));
761 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
763 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
764 n->u.controlfield.data,
765 strlen(n->u.controlfield.data));
766 marc_iconv_reset(mt, wr);
767 wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
768 /* TODO convert special */
770 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
771 strlen(n->u.controlfield.tag));
772 wrbuf_puts(wr, ">\n");
774 case YAZ_MARC_COMMENT:
775 wrbuf_printf(wr, "<!-- ");
776 wrbuf_puts(wr, n->u.comment);
777 wrbuf_printf(wr, " -->\n");
779 case YAZ_MARC_LEADER:
780 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
781 wrbuf_iconv_write_cdata(wr,
782 0 , /* no charset conversion for leader */
783 n->u.leader, strlen(n->u.leader));
784 wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
787 wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
791 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
797 if (mt->write_using_libxml2)
804 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
806 ret = yaz_marc_write_xml_turbo_xml(mt, &root_ptr, ns, format, type);
810 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
813 xmlDocSetRootElement(doc, root_ptr);
814 xmlDocDumpMemory(doc, &buf_out, &len_out);
816 wrbuf_write(wr, (const char *) buf_out, len_out);
827 return yaz_marc_write_marcxml_wrbuf(mt, wr, ns, format, type, turbo);
830 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
832 /* set leader 09 to 'a' for UNICODE */
833 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
834 if (!mt->leader_spec)
835 yaz_marc_modify_leader(mt, 9, "a");
836 return yaz_marc_write_marcxml_ns(mt, wr,
837 "http://www.loc.gov/MARC21/slim",
841 int yaz_marc_write_turbomarc(yaz_marc_t mt, WRBUF wr)
843 /* set leader 09 to 'a' for UNICODE */
844 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
845 if (!mt->leader_spec)
846 yaz_marc_modify_leader(mt, 9, "a");
847 return yaz_marc_write_marcxml_ns(mt, wr,
848 "http://www.indexdata.com/turbomarc", 0, 0, 1);
851 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
855 return yaz_marc_write_marcxml_ns(mt, wr,
856 "info:lc/xmlns/marcxchange-v1",
862 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n,
864 xmlNsPtr ns_record, WRBUF wr_cdata,
865 int identifier_length)
868 struct yaz_marc_subfield *s;
869 WRBUF subfield_name = wrbuf_alloc();
871 /* TODO consider if safe */
874 strncpy(field + 1, n->u.datafield.tag, 3);
876 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
878 if (n->u.datafield.indicator)
881 for (i = 0; n->u.datafield.indicator[i]; i++)
886 ind_val[0] = n->u.datafield.indicator[i];
888 sprintf(ind_str, "%s%d", indicator_name[1], i+1);
889 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
892 for (s = n->u.datafield.subfields; s; s = s->next)
895 xmlNode *ptr_subfield;
896 size_t using_code_len = get_subfield_len(mt, s->code_data,
898 wrbuf_rewind(wr_cdata);
899 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
900 marc_iconv_reset(mt, wr_cdata);
902 wrbuf_rewind(subfield_name);
903 wrbuf_puts(subfield_name, "s");
904 not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0;
905 ptr_subfield = xmlNewTextChild(ptr, ns_record,
906 BAD_CAST wrbuf_cstr(subfield_name),
907 BAD_CAST wrbuf_cstr(wr_cdata));
910 /* Generate code attribute value and add */
911 wrbuf_rewind(wr_cdata);
912 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
913 xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata));
916 wrbuf_destroy(subfield_name);
919 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
924 struct yaz_marc_node *n;
925 int identifier_length;
926 const char *leader = 0;
931 for (n = mt->nodes; n; n = n->next)
932 if (n->which == YAZ_MARC_LEADER)
934 leader = n->u.leader;
940 if (!atoi_n_check(leader+11, 1, &identifier_length))
943 wr_cdata = wrbuf_alloc();
945 record_ptr = xmlNewNode(0, BAD_CAST "r");
946 *root_ptr = record_ptr;
948 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
949 xmlSetNs(record_ptr, ns_record);
952 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
954 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
955 for (n = mt->nodes; n; n = n->next)
965 case YAZ_MARC_DATAFIELD:
966 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
968 case YAZ_MARC_CONTROLFIELD:
969 wrbuf_rewind(wr_cdata);
970 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
971 marc_iconv_reset(mt, wr_cdata);
973 strncpy(field + 1, n->u.controlfield.tag, 3);
974 ptr = xmlNewTextChild(record_ptr, ns_record,
976 BAD_CAST wrbuf_cstr(wr_cdata));
978 case YAZ_MARC_COMMENT:
979 ptr = xmlNewComment(BAD_CAST n->u.comment);
980 xmlAddChild(record_ptr, ptr);
982 case YAZ_MARC_LEADER:
983 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "l",
984 BAD_CAST n->u.leader);
988 wrbuf_destroy(wr_cdata);
993 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
998 struct yaz_marc_node *n;
999 int identifier_length;
1000 const char *leader = 0;
1001 xmlNode *record_ptr;
1005 for (n = mt->nodes; n; n = n->next)
1006 if (n->which == YAZ_MARC_LEADER)
1008 leader = n->u.leader;
1014 if (!atoi_n_check(leader+11, 1, &identifier_length))
1017 wr_cdata = wrbuf_alloc();
1019 record_ptr = xmlNewNode(0, BAD_CAST "record");
1020 *root_ptr = record_ptr;
1022 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1023 xmlSetNs(record_ptr, ns_record);
1026 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1028 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1029 for (n = mt->nodes; n; n = n->next)
1031 struct yaz_marc_subfield *s;
1036 case YAZ_MARC_DATAFIELD:
1037 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1038 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1039 if (n->u.datafield.indicator)
1042 for (i = 0; n->u.datafield.indicator[i]; i++)
1047 sprintf(ind_str, "ind%d", i+1);
1048 ind_val[0] = n->u.datafield.indicator[i];
1050 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1053 for (s = n->u.datafield.subfields; s; s = s->next)
1055 xmlNode *ptr_subfield;
1056 size_t using_code_len = get_subfield_len(mt, s->code_data,
1058 wrbuf_rewind(wr_cdata);
1059 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1060 s->code_data + using_code_len);
1061 marc_iconv_reset(mt, wr_cdata);
1062 ptr_subfield = xmlNewTextChild(
1064 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1066 wrbuf_rewind(wr_cdata);
1067 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1068 s->code_data, using_code_len);
1069 xmlNewProp(ptr_subfield, BAD_CAST "code",
1070 BAD_CAST wrbuf_cstr(wr_cdata));
1073 case YAZ_MARC_CONTROLFIELD:
1074 wrbuf_rewind(wr_cdata);
1075 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1076 marc_iconv_reset(mt, wr_cdata);
1078 ptr = xmlNewTextChild(record_ptr, ns_record,
1079 BAD_CAST "controlfield",
1080 BAD_CAST wrbuf_cstr(wr_cdata));
1082 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1084 case YAZ_MARC_COMMENT:
1085 ptr = xmlNewComment(BAD_CAST n->u.comment);
1086 xmlAddChild(record_ptr, ptr);
1088 case YAZ_MARC_LEADER:
1089 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1090 BAD_CAST n->u.leader);
1094 wrbuf_destroy(wr_cdata);
1100 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1102 struct yaz_marc_node *n;
1103 int indicator_length;
1104 int identifier_length;
1105 int length_data_entry;
1106 int length_starting;
1107 int length_implementation;
1108 int data_offset = 0;
1109 const char *leader = 0;
1110 WRBUF wr_dir, wr_head, wr_data_tmp;
1113 for (n = mt->nodes; n; n = n->next)
1114 if (n->which == YAZ_MARC_LEADER)
1115 leader = n->u.leader;
1119 if (!atoi_n_check(leader+10, 1, &indicator_length))
1121 if (!atoi_n_check(leader+11, 1, &identifier_length))
1123 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1125 if (!atoi_n_check(leader+21, 1, &length_starting))
1127 if (!atoi_n_check(leader+22, 1, &length_implementation))
1130 wr_data_tmp = wrbuf_alloc();
1131 wr_dir = wrbuf_alloc();
1132 for (n = mt->nodes; n; n = n->next)
1134 int data_length = 0;
1135 struct yaz_marc_subfield *s;
1139 case YAZ_MARC_DATAFIELD:
1140 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1141 data_length += indicator_length;
1142 wrbuf_rewind(wr_data_tmp);
1143 for (s = n->u.datafield.subfields; s; s = s->next)
1145 /* write dummy IDFS + content */
1146 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1147 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1148 marc_iconv_reset(mt, wr_data_tmp);
1150 /* write dummy FS (makes MARC-8 to become ASCII) */
1151 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1152 marc_iconv_reset(mt, wr_data_tmp);
1153 data_length += wrbuf_len(wr_data_tmp);
1155 case YAZ_MARC_CONTROLFIELD:
1156 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1158 wrbuf_rewind(wr_data_tmp);
1159 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1160 n->u.controlfield.data);
1161 marc_iconv_reset(mt, wr_data_tmp);
1162 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1163 marc_iconv_reset(mt, wr_data_tmp);
1164 data_length += wrbuf_len(wr_data_tmp);
1166 case YAZ_MARC_COMMENT:
1168 case YAZ_MARC_LEADER:
1173 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1174 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1175 data_offset += data_length;
1178 /* mark end of directory */
1179 wrbuf_putc(wr_dir, ISO2709_FS);
1181 /* base address of data (comes after leader+directory) */
1182 base_address = 24 + wrbuf_len(wr_dir);
1184 wr_head = wrbuf_alloc();
1186 /* write record length */
1187 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1188 /* from "original" leader */
1189 wrbuf_write(wr_head, leader+5, 7);
1190 /* base address of data */
1191 wrbuf_printf(wr_head, "%05d", base_address);
1192 /* from "original" leader */
1193 wrbuf_write(wr_head, leader+17, 7);
1195 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1196 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1197 wrbuf_destroy(wr_head);
1198 wrbuf_destroy(wr_dir);
1199 wrbuf_destroy(wr_data_tmp);
1201 for (n = mt->nodes; n; n = n->next)
1203 struct yaz_marc_subfield *s;
1207 case YAZ_MARC_DATAFIELD:
1208 wrbuf_write(wr, n->u.datafield.indicator, indicator_length);
1209 for (s = n->u.datafield.subfields; s; s = s->next)
1211 wrbuf_putc(wr, ISO2709_IDFS);
1212 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1213 marc_iconv_reset(mt, wr);
1215 wrbuf_putc(wr, ISO2709_FS);
1217 case YAZ_MARC_CONTROLFIELD:
1218 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1219 marc_iconv_reset(mt, wr);
1220 wrbuf_putc(wr, ISO2709_FS);
1222 case YAZ_MARC_COMMENT:
1224 case YAZ_MARC_LEADER:
1228 wrbuf_printf(wr, "%c", ISO2709_RS);
1233 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1235 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1238 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1240 return -1; /* error */
1241 return r; /* OK, return length > 0 */
1244 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1245 const char **result, size_t *rsize)
1249 wrbuf_rewind(mt->m_wr);
1250 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1252 *result = wrbuf_cstr(mt->m_wr);
1254 *rsize = wrbuf_len(mt->m_wr);
1258 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1260 mt->output_format = xmlmode;
1263 void yaz_marc_debug(yaz_marc_t mt, int level)
1269 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1274 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1276 return mt->iconv_cd;
1279 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1281 struct yaz_marc_node *n;
1283 for (n = mt->nodes; n; n = n->next)
1284 if (n->which == YAZ_MARC_LEADER)
1286 leader = n->u.leader;
1287 memcpy(leader+off, str, strlen(str));
1292 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1294 xfree(mt->leader_spec);
1295 mt->leader_spec = 0;
1298 char dummy_leader[24];
1299 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1301 mt->leader_spec = xstrdup(leader_spec);
1306 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1308 const char *cp = leader_spec;
1313 int no_read = 0, no = 0;
1315 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1316 if (no < 2 || no_read < 3)
1318 if (pos < 0 || (size_t) pos >= size)
1323 const char *vp = strchr(val+1, '\'');
1329 if (len + pos > size)
1331 memcpy(leader + pos, val+1, len);
1333 else if (*val >= '0' && *val <= '9')
1349 int yaz_marc_decode_formatstr(const char *arg)
1352 if (!strcmp(arg, "marc"))
1353 mode = YAZ_MARC_ISO2709;
1354 if (!strcmp(arg, "marcxml"))
1355 mode = YAZ_MARC_MARCXML;
1356 if (!strcmp(arg, "turbomarc"))
1357 mode = YAZ_MARC_TURBOMARC;
1358 if (!strcmp(arg, "marcxchange"))
1359 mode = YAZ_MARC_XCHANGE;
1360 if (!strcmp(arg, "line"))
1361 mode = YAZ_MARC_LINE;
1365 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1367 mt->write_using_libxml2 = enable;
1373 * c-file-style: "Stroustrup"
1374 * indent-tabs-mode: nil
1376 * vim: shiftwidth=4 tabstop=8 expandtab