1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
9 #include <yaz/record_conv.h>
11 #include <yaz/wrbuf.h>
14 #include <yaz/proto.h>
15 #include <yaz/prt-ext.h>
16 #include <yaz/oid_db.h>
19 #include <libxml/parser.h>
20 #include <libxml/tree.h>
23 #include <libxslt/xslt.h>
26 yaz_record_conv_t conv_configure(const char *xmlstring, WRBUF w)
28 xmlDocPtr doc = xmlParseMemory(xmlstring, strlen(xmlstring));
31 wrbuf_printf(w, "xmlParseMemory");
36 xmlNodePtr ptr = xmlDocGetRootElement(doc);
37 yaz_record_conv_t p = yaz_record_conv_create();
41 const char *srcdir = getenv("srcdir");
43 yaz_record_conv_set_path(p, srcdir);
47 wrbuf_printf(w, "xmlDocGetRootElement");
48 yaz_record_conv_destroy(p);
53 wrbuf_printf(w, "yaz_record_conv_create");
59 int r = yaz_record_conv_configure(p, ptr);
63 wrbuf_puts(w, yaz_record_conv_get_error(p));
64 yaz_record_conv_destroy(p);
73 int conv_configure_test(const char *xmlstring, const char *expect_error,
74 yaz_record_conv_t *pt)
76 WRBUF w = wrbuf_alloc();
79 yaz_record_conv_t p = conv_configure(xmlstring, w);
83 if (expect_error && !strcmp(wrbuf_cstr(w), expect_error))
88 printf("%s\n", wrbuf_cstr(w));
103 yaz_record_conv_destroy(p);
109 static void tst_configure(void)
114 YAZ_CHECK(conv_configure_test("<bad", "xmlParseMemory", 0));
117 YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
119 "Element <backend>: expected <marc> or "
120 "<xslt> element, got <bad>", 0));
123 YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
124 "<xslt stylesheet=\"test_record_conv.xsl\"/>"
126 " inputcharset=\"marc-8\""
127 " outputcharset=\"marc-8\""
130 "Element <marc>: attribute 'inputformat' "
132 YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
135 "Element <xslt>: attribute 'stylesheet' "
137 YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
139 " inputcharset=\"utf-8\""
140 " outputcharset=\"marc-8\""
141 " inputformat=\"xml\""
142 " outputformat=\"marc\""
144 "<xslt stylesheet=\"test_record_conv.xsl\"/>"
148 YAZ_CHECK(conv_configure_test("<backend syntax='usmarc' name='F'>"
149 "<xslt stylesheet=\"test_record_conv.xsl\"/>"
152 " YAZ compiled without XSLT support", 0));
156 static int conv_convert_test(yaz_record_conv_t p,
157 const char *input_record,
158 const char *output_expect_record)
167 WRBUF output_record = wrbuf_alloc();
168 int r = yaz_record_conv_record(p, input_record, strlen(input_record),
172 if (output_expect_record)
174 printf("yaz_record_conv error=%s\n",
175 yaz_record_conv_get_error(p));
183 if (!output_expect_record)
187 else if (strcmp(output_expect_record, wrbuf_cstr(output_record)))
190 printf("got-output_record len=%ld: %s\n",
191 (long) wrbuf_len(output_record),
192 wrbuf_cstr(output_record));
193 printf("output_expect_record len=%ld %s\n",
194 (long) strlen(output_expect_record),
195 output_expect_record);
202 wrbuf_destroy(output_record);
207 static int conv_convert_test_iter(yaz_record_conv_t p,
208 const char *input_record,
209 const char *output_expect_record,
214 for (i = 0; i < num_iter; i++)
216 ret = conv_convert_test(p, input_record, output_expect_record);
223 static void tst_convert1(void)
225 yaz_record_conv_t p = 0;
226 const char *marcxml_rec =
227 "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
228 " <leader>00080nam a22000498a 4500</leader>\n"
229 " <controlfield tag=\"001\"> 11224466 </controlfield>\n"
230 " <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n"
231 " <subfield code=\"a\"> 11224466 </subfield>\n"
234 const char *tmarcxml_rec =
235 "<r xmlns=\"http://www.indexdata.com/MARC21/turboxml\">\n"
236 " <l>00080nam a22000498a 4500</l>\n"
237 " <c001> 11224466 </c001>\n"
238 " <d010 i1=\" \" i2=\" \">\n"
239 " <sa> 11224466 </sa>\n"
242 const char *iso2709_rec =
243 "\x30\x30\x30\x38\x30\x6E\x61\x6D\x20\x61\x32\x32\x30\x30\x30\x34"
244 "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
245 "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x37\x30\x30\x30\x31\x33"
246 "\x1E\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x20\x20"
247 "\x1F\x61\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x1D";
249 const char *solrmarc_rec =
250 "\x30\x30\x30\x38\x30\x6E\x61\x6D\x20\x61\x32\x32\x30\x30\x30\x34"
251 "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
252 "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x37\x30\x30\x30\x31\x33"
253 "#30;\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20#30;\x20\x20"
254 "#31;\x61\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20#30;#29;";
255 const char *raw_rec = /* raw is xml-string of marcxml_rec */
256 "<raw><record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
257 " <leader>00080nam a22000498a 4500</leader>\n"
258 " <controlfield tag=\"001\"> 11224466 </controlfield>\n"
259 " <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n"
260 " <subfield code=\"a\"> 11224466 </subfield>\n"
262 "</record>\n</raw>\n";
264 YAZ_CHECK(conv_configure_test("<backend>"
266 " inputcharset=\"utf-8\""
267 " outputcharset=\"marc-8\""
268 " inputformat=\"xml\""
269 " outputformat=\"marc\""
273 YAZ_CHECK(conv_convert_test(p, marcxml_rec, iso2709_rec));
274 YAZ_CHECK(conv_convert_test(p, tmarcxml_rec, iso2709_rec));
275 yaz_record_conv_destroy(p);
277 YAZ_CHECK(conv_configure_test("<backend>"
279 " outputcharset=\"utf-8\""
280 " inputcharset=\"marc-8\""
281 " outputformat=\"marcxml\""
282 " inputformat=\"marc\""
286 YAZ_CHECK(conv_convert_test(p, iso2709_rec, marcxml_rec));
287 yaz_record_conv_destroy(p);
289 YAZ_CHECK(conv_configure_test("<backend>"
292 " outputcharset=\"utf-8\""
293 " inputcharset=\"marc-8\""
294 " outputformat=\"marcxml\""
295 " inputformat=\"marc\""
299 YAZ_CHECK(conv_convert_test(p, solrmarc_rec, marcxml_rec));
300 yaz_record_conv_destroy(p);
302 YAZ_CHECK(conv_configure_test("<backend>"
303 "<xslt stylesheet=\"test_record_conv.xsl\"/>"
304 "<xslt stylesheet=\"test_record_conv.xsl\"/>"
306 " inputcharset=\"utf-8\""
307 " outputcharset=\"marc-8\""
308 " inputformat=\"xml\""
309 " outputformat=\"marc\""
312 " outputcharset=\"utf-8\""
313 " inputcharset=\"marc-8\""
314 " outputformat=\"marcxml\""
315 " inputformat=\"marc\""
319 YAZ_CHECK(conv_convert_test(p, marcxml_rec, marcxml_rec));
320 yaz_record_conv_destroy(p);
323 YAZ_CHECK(conv_configure_test("<backend>"
324 "<xslt stylesheet=\"test_record_conv.xsl\"/>"
325 "<xslt stylesheet=\"test_record_conv.xsl\"/>"
327 " outputcharset=\"marc-8\""
328 " inputformat=\"xml\""
329 " outputformat=\"marc\""
332 " inputcharset=\"marc-8\""
333 " outputformat=\"marcxml\""
334 " inputformat=\"marc\""
338 YAZ_CHECK(conv_convert_test(p, marcxml_rec, marcxml_rec));
339 yaz_record_conv_destroy(p);
341 YAZ_CHECK(conv_configure_test("<backend>"
342 "<select path=\"/raw\"/>"
345 YAZ_CHECK(conv_convert_test(p, raw_rec, marcxml_rec));
346 yaz_record_conv_destroy(p);
349 static void tst_convert2(void)
351 yaz_record_conv_t p = 0;
352 const char *marcxml_rec =
353 "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
354 " <leader>00080nam a22000498a 4500</leader>\n"
355 " <controlfield tag=\"001\"> 11224466 </controlfield>\n"
356 " <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n"
357 " <subfield code=\"a\">københavn</subfield>\n"
360 const char *iso2709_rec =
361 "\x30\x30\x30\x37\x37\x6E\x61\x6D\x20\x61\x32\x32\x30\x30\x30\x34"
362 "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
363 "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x34\x30\x30\x30\x31\x33"
364 "\x1E\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x20\x20"
365 "\x1F\x61\x6b\xb2\x62\x65\x6e\x68\x61\x76\x6e\x1E\x1D";
367 YAZ_CHECK(conv_configure_test("<backend>"
369 " inputcharset=\"utf-8\""
370 " outputcharset=\"marc-8\""
371 " inputformat=\"xml\""
372 " outputformat=\"marc\""
376 YAZ_CHECK(conv_convert_test_iter(p, marcxml_rec, iso2709_rec, 100));
377 yaz_record_conv_destroy(p);
380 static void tst_convert3(void)
382 NMEM nmem = nmem_create();
384 yaz_record_conv_t p = 0;
386 const char *iso2709_rec =
387 "\x30\x30\x30\x37\x37\x6E\x61\x6D\x20\x20\x32\x32\x30\x30\x30\x34"
388 "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30"
389 "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x34\x30\x30\x30\x31\x33"
390 "\x1E\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x20\x20"
391 "\x1F\x61\x6b\xb2\x62\x65\x6e\x68\x61\x76\x6e\x1E\x1D";
393 const char *opacxml_rec =
395 " <bibliographicRecord>\n"
396 "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
397 " <leader>00077nam a22000498a 4500</leader>\n"
398 " <controlfield tag=\"001\"> 11224466 </controlfield>\n"
399 " <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n"
400 " <subfield code=\"a\">k" "\xc3" "\xb8" /* oslash in UTF_8 */
401 "benhavn</subfield>\n"
404 " </bibliographicRecord>\n"
407 " <typeOfRecord>u</typeOfRecord>\n"
408 " <encodingLevel>U</encodingLevel>\n"
409 " <receiptAcqStatus>0</receiptAcqStatus>\n"
410 " <dateOfReport>000000</dateOfReport>\n"
411 " <nucCode>s-FM/GC</nucCode>\n"
412 " <localLocation>Main or Science/Business Reading Rms - STORED OFFSITE</localLocation>\n"
413 " <callNumber>MLCM 89/00602 (N)</callNumber>\n"
414 " <shelvingData>FT MEADE</shelvingData>\n"
415 " <copyNumber>Copy 1</copyNumber>\n"
418 " <enumeration>1</enumeration>\n"
419 " <chronology>2</chronology>\n"
420 " <enumAndChron>3</enumAndChron>\n"
423 " <enumeration>1</enumeration>\n"
424 " <chronology>2</chronology>\n"
425 " <enumAndChron>3</enumAndChron>\n"
430 " <availableNow value=\"1\"/>\n"
431 " <availabilityDate>20130129</availabilityDate>\n"
432 " <itemId>1226176</itemId>\n"
433 " <renewable value=\"0\"/>\n"
434 " <onHold value=\"0\"/>\n"
441 Z_OPACRecord *z_opac = nmem_malloc(nmem, sizeof(*z_opac));
442 Z_HoldingsAndCircData *h;
445 z_opac->bibliographicRecord =
446 z_ext_record_oid_nmem(nmem, yaz_oid_recsyn_usmarc,
447 iso2709_rec, strlen(iso2709_rec));
448 z_opac->num_holdingsData = 1;
449 z_opac->holdingsData = (Z_HoldingsRecord **)
450 nmem_malloc(nmem, sizeof(Z_HoldingsRecord *) * 1);
451 z_opac->holdingsData[0] = (Z_HoldingsRecord *)
452 nmem_malloc(nmem, sizeof(Z_HoldingsRecord));
453 z_opac->holdingsData[0]->which = Z_HoldingsRecord_holdingsAndCirc;
454 h = z_opac->holdingsData[0]->u.holdingsAndCirc = (Z_HoldingsAndCircData *)
455 nmem_malloc(nmem, sizeof(*h));
456 h->typeOfRecord = nmem_strdup(nmem, "u");
457 h->encodingLevel = nmem_strdup(nmem, "U");
459 h->receiptAcqStatus = nmem_strdup(nmem, "0");
460 h->generalRetention = 0;
462 h->dateOfReport = nmem_strdup(nmem, "000000");
463 h->nucCode = nmem_strdup(nmem, "s-FM/GC");
464 h->localLocation = nmem_strdup(nmem,
465 "Main or Science/Business Reading "
466 "Rms - STORED OFFSITE");
467 h->shelvingLocation = 0;
468 h->callNumber = nmem_strdup(nmem, "MLCM 89/00602 (N)");
469 h->shelvingData = nmem_strdup(nmem, "FT MEADE");
470 h->copyNumber = nmem_strdup(nmem, "Copy 1");
472 h->reproductionNote = 0;
473 h->termsUseRepro = 0;
478 h->volumes = (Z_Volume **)
479 nmem_malloc(nmem, 2 * sizeof(Z_Volume *));
481 h->volumes[0] = (Z_Volume *)
482 nmem_malloc(nmem, sizeof(Z_Volume));
483 h->volumes[1] = h->volumes[0];
485 h->volumes[0]->enumeration = nmem_strdup(nmem, "1");
486 h->volumes[0]->chronology = nmem_strdup(nmem, "2");
487 h->volumes[0]->enumAndChron = nmem_strdup(nmem, "3");
489 h->num_circulationData = 1;
490 h->circulationData = (Z_CircRecord **)
491 nmem_malloc(nmem, 1 * sizeof(Z_CircRecord *));
492 circ = h->circulationData[0] = (Z_CircRecord *)
493 nmem_malloc(nmem, sizeof(Z_CircRecord));
494 circ->availableNow = nmem_booldup(nmem, 1);
495 circ->availablityDate = nmem_strdup(nmem, "20130129");
496 circ->availableThru = 0;
497 circ->restrictions = 0;
498 circ->itemId = nmem_strdup(nmem, "1226176");
499 circ->renewable = nmem_booldup(nmem, 0);
500 circ->onHold = nmem_booldup(nmem, 0);
501 circ->enumAndChron = 0;
503 circ->temporaryLocation = 0;
505 YAZ_CHECK(conv_configure_test("<backend>"
507 " inputcharset=\"marc-8\""
508 " outputcharset=\"utf-8\""
509 " inputformat=\"marc\""
510 " outputformat=\"marcxml\""
517 WRBUF output_record = wrbuf_alloc();
518 ret = yaz_record_conv_opac_record(p, z_opac, output_record);
522 ret = strcmp(wrbuf_cstr(output_record), opacxml_rec);
526 printf("got-output_record len=%ld: %s\n",
527 (long) wrbuf_len(output_record),
528 wrbuf_cstr(output_record));
529 printf("output_expect_record len=%ld %s\n",
530 (long) strlen(opacxml_rec),
534 yaz_record_conv_destroy(p);
535 wrbuf_destroy(output_record);
538 Z_OPACRecord *opac = 0;
539 yaz_marc_t mt = yaz_marc_create();
540 ret = yaz_xml_to_opac(mt, opacxml_rec, strlen(opacxml_rec),
541 &opac, 0 /* iconv */, nmem, 0);
547 WRBUF output_record = wrbuf_alloc();
550 yaz_marc_xml(mt, YAZ_MARC_MARCXML);
551 yaz_opac_decode_wrbuf(mt, opac, output_record);
553 /* change MARC size to 00077 from 00078, due to
554 encoding of the aring (two bytes in UTF-8) */
555 p = strstr(wrbuf_buf(output_record), "00078");
560 ret = strcmp(wrbuf_cstr(output_record), opacxml_rec);
564 printf("got-output_record len=%ld: %s\n",
565 (long) wrbuf_len(output_record),
566 wrbuf_cstr(output_record));
567 printf("output_expect_record len=%ld %s\n",
568 (long) strlen(opacxml_rec),
571 wrbuf_destroy(output_record);
573 yaz_marc_destroy(mt);
578 static void tst_convert4(void)
580 NMEM nmem = nmem_create();
583 const char *opacxml_rec =
585 " <bibliographicRecord>\n"
586 "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
587 " <leader>00077nam a22000498a 4500</leader>\n"
588 " <controlfield tag=\"001\"> 11224466 </controlfield>\n"
589 " <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n"
590 " <subfield code=\"a\">k" "\xc3" "\xb8" /* oslash in UTF_8 */
591 "benhavn</subfield>\n"
594 " </bibliographicRecord>\n"
597 " <shelvingLocation>Sprague Library hidden basement</shelvingLocation>\n"
598 " <callNumber>E98.L7L44 1976 </callNumber>\n"
605 Z_OPACRecord *opac = 0;
606 yaz_marc_t mt = yaz_marc_create();
607 ret = yaz_xml_to_opac(mt, opacxml_rec, strlen(opacxml_rec),
608 &opac, 0 /* iconv */, nmem, 0);
611 yaz_marc_destroy(mt);
617 int main(int argc, char **argv)
619 YAZ_CHECK_INIT(argc, argv);
620 yaz_log_xml_errors(0, 0 /* disable log */);
629 xsltCleanupGlobals();
640 * c-file-style: "Stroustrup"
641 * indent-tabs-mode: nil
643 * vim: shiftwidth=4 tabstop=8 expandtab