1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2008 Index Data
3 * See the file LICENSE for details.
7 * \brief MARC-8 encoding
10 * http://www.loc.gov/marc/specifications/speccharmarc8.html
22 #include <yaz/xmalloc.h>
23 #include <yaz/snprintf.h>
26 yaz_conv_func_t yaz_marc8r_42_conv;
27 yaz_conv_func_t yaz_marc8r_45_conv;
28 yaz_conv_func_t yaz_marc8r_67_conv;
29 yaz_conv_func_t yaz_marc8r_62_conv;
30 yaz_conv_func_t yaz_marc8r_70_conv;
31 yaz_conv_func_t yaz_marc8r_32_conv;
32 yaz_conv_func_t yaz_marc8r_4E_conv;
33 yaz_conv_func_t yaz_marc8r_51_conv;
34 yaz_conv_func_t yaz_marc8r_33_conv;
35 yaz_conv_func_t yaz_marc8r_34_conv;
36 yaz_conv_func_t yaz_marc8r_53_conv;
37 yaz_conv_func_t yaz_marc8r_31_conv;
43 unsigned write_marc8_second_half_char;
44 unsigned long write_marc8_last;
46 const char *write_marc8_lpage;
47 const char *write_marc8_g0;
48 const char *write_marc8_g1;
51 static void init_marc8(yaz_iconv_encoder_t w)
53 struct encoder_data *data = w->data;
54 data->write_marc8_second_half_char = 0;
55 data->write_marc8_last = 0;
56 data->write_marc8_ncr = 0;
57 data->write_marc8_lpage = 0;
58 data->write_marc8_g0 = ESC "(B";
59 data->write_marc8_g1 = 0;
62 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
63 struct encoder_data *w,
64 char **outbuf, size_t *outbytesleft,
65 const char *page_chr);
67 static unsigned long lookup_marc8(yaz_iconv_t cd,
68 unsigned long x, int *comb,
69 const char **page_chr)
72 char *utf8_outbuf = utf8_buf;
73 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
76 r = yaz_write_UTF8_char(x, &utf8_outbuf, &utf8_outbytesleft, &error_code);
77 if (r == (size_t)(-1))
79 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
85 size_t inbytesleft, no_read_sub = 0;
89 inp = (unsigned char *) utf8_buf;
90 inbytesleft = strlen(utf8_buf);
92 x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
98 x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
101 *page_chr = ESC "(B";
104 x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
110 x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
116 x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
119 *page_chr = ESC "(2";
122 x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
125 *page_chr = ESC "(N";
128 x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
131 *page_chr = ESC "(Q";
134 x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
137 *page_chr = ESC "(3";
140 x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
143 *page_chr = ESC "(4";
146 x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
149 *page_chr = ESC "(S";
152 x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
155 *page_chr = ESC "$1";
158 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
163 static size_t flush_combos(yaz_iconv_t cd,
164 struct encoder_data *w,
165 char **outbuf, size_t *outbytesleft)
167 unsigned long y = w->write_marc8_last;
172 assert(w->write_marc8_lpage);
173 if (w->write_marc8_lpage)
175 size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
176 w->write_marc8_lpage);
181 if (9 >= *outbytesleft)
183 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
184 return (size_t) (-1);
186 if (w->write_marc8_ncr)
188 yaz_snprintf(*outbuf, 9, "&#x%04x;", y);
189 (*outbytesleft) -= 8;
197 byte = (unsigned char )((y>>16) & 0xff);
199 (*outbuf)[out_no++] = byte;
200 byte = (unsigned char)((y>>8) & 0xff);
202 (*outbuf)[out_no++] = byte;
203 byte = (unsigned char )(y & 0xff);
205 (*outbuf)[out_no++] = byte;
207 (*outbytesleft) -= out_no;
210 if (w->write_marc8_second_half_char)
212 *(*outbuf)++ = w->write_marc8_second_half_char;
216 w->write_marc8_last = 0;
217 w->write_marc8_ncr = 0;
218 w->write_marc8_lpage = 0;
219 w->write_marc8_second_half_char = 0;
223 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
224 struct encoder_data *w,
225 char **outbuf, size_t *outbytesleft,
226 const char *page_chr)
228 const char **old_page_chr = &w->write_marc8_g0;
230 /* are we going to a G1-set (such as such as ESC ")!E") */
231 if (page_chr && page_chr[1] == ')')
232 old_page_chr = &w->write_marc8_g1;
234 if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
237 const char *page_out = page_chr;
239 if (*outbytesleft < 8)
241 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
243 return (size_t) (-1);
248 if (!strcmp(*old_page_chr, ESC "p")
249 || !strcmp(*old_page_chr, ESC "g")
250 || !strcmp(*old_page_chr, ESC "b"))
253 /* Technique 1 leave */
254 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
256 /* Must leave script + enter new page */
257 plen = strlen(page_out);
258 memcpy(*outbuf, page_out, plen);
260 (*outbytesleft) -= plen;
265 *old_page_chr = page_chr;
266 plen = strlen(page_out);
267 memcpy(*outbuf, page_out, plen);
269 (*outbytesleft) -= plen;
275 static size_t yaz_write_marc8_2(yaz_iconv_t cd, struct encoder_data *w,
277 char **outbuf, size_t *outbytesleft,
282 const char *page_chr = 0;
283 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
288 return (size_t) (-1);
303 size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
309 w->write_marc8_second_half_char = 0xEC;
310 else if (x == 0x0360)
311 w->write_marc8_second_half_char = 0xFB;
313 if (*outbytesleft <= 1)
315 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
316 return (size_t) (-1);
323 size_t r = flush_combos(cd, w, outbuf, outbytesleft);
327 w->write_marc8_last = y;
328 w->write_marc8_lpage = page_chr;
329 w->write_marc8_ncr = enable_ncr;
334 static size_t flush_marc8(yaz_iconv_t cd, yaz_iconv_encoder_t en,
335 char **outbuf, size_t *outbytesleft)
337 struct encoder_data *w = en->data;
338 size_t r = flush_combos(cd, w, outbuf, outbytesleft);
341 w->write_marc8_g1 = 0;
342 return yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft, ESC "(B");
345 static size_t yaz_write_marc8_generic(yaz_iconv_t cd, struct encoder_data *w,
347 char **outbuf, size_t *outbytesleft,
350 unsigned long x1, x2;
351 if (yaz_iso_8859_1_lookup_y(x, &x1, &x2))
353 /* save the output pointers .. */
354 char *outbuf0 = *outbuf;
355 size_t outbytesleft0 = *outbytesleft;
356 int last_ch = w->write_marc8_last;
357 int ncr = w->write_marc8_ncr;
358 const char *lpage = w->write_marc8_lpage;
361 r = yaz_write_marc8_2(cd, w, x1,
362 outbuf, outbytesleft, loss_mode);
365 r = yaz_write_marc8_2(cd, w, x2,
366 outbuf, outbytesleft, loss_mode);
367 if (r && yaz_iconv_error(cd) == YAZ_ICONV_E2BIG)
369 /* not enough room. reset output to original values */
371 *outbytesleft = outbytesleft0;
372 w->write_marc8_last = last_ch;
373 w->write_marc8_ncr = ncr;
374 w->write_marc8_lpage = lpage;
378 return yaz_write_marc8_2(cd, w, x, outbuf, outbytesleft, loss_mode);
381 static size_t write_marc8_normal(yaz_iconv_t cd, yaz_iconv_encoder_t e,
383 char **outbuf, size_t *outbytesleft)
385 return yaz_write_marc8_generic(cd, e->data, x, outbuf, outbytesleft, 0);
388 static size_t write_marc8_lossy(yaz_iconv_t cd, yaz_iconv_encoder_t e,
390 char **outbuf, size_t *outbytesleft)
392 return yaz_write_marc8_generic(cd, e->data, x, outbuf, outbytesleft, 1);
395 static size_t write_marc8_lossless(yaz_iconv_t cd, yaz_iconv_encoder_t e,
397 char **outbuf, size_t *outbytesleft)
399 return yaz_write_marc8_generic(cd, e->data, x, outbuf, outbytesleft, 2);
402 static void destroy_marc8(yaz_iconv_encoder_t e)
407 yaz_iconv_encoder_t yaz_marc8_encoder(const char *tocode,
408 yaz_iconv_encoder_t e)
411 if (!yaz_matchstr(tocode, "MARC8"))
412 e->write_handle = write_marc8_normal;
413 else if (!yaz_matchstr(tocode, "MARC8s"))
414 e->write_handle = write_marc8_normal;
415 else if (!yaz_matchstr(tocode, "MARC8lossy"))
416 e->write_handle = write_marc8_lossy;
417 else if (!yaz_matchstr(tocode, "MARC8lossless"))
418 e->write_handle = write_marc8_lossless;
423 struct encoder_data *data = xmalloc(sizeof(*data));
425 e->destroy_handle = destroy_marc8;
426 e->flush_handle = flush_marc8;
427 e->init_handle = init_marc8;
436 * indent-tabs-mode: nil
438 * vim: shiftwidth=4 tabstop=8 expandtab