1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2013 Index Data
3 * See the file LICENSE for details.
7 * \brief MARC-8 encoding
10 * http://www.loc.gov/marc/specifications/speccharmarc8.html
21 #include <yaz/xmalloc.h>
22 #include <yaz/snprintf.h>
25 yaz_conv_func_t yaz_marc8r_42_conv;
26 yaz_conv_func_t yaz_marc8r_45_conv;
27 yaz_conv_func_t yaz_marc8r_67_conv;
28 yaz_conv_func_t yaz_marc8r_62_conv;
29 yaz_conv_func_t yaz_marc8r_70_conv;
30 yaz_conv_func_t yaz_marc8r_32_conv;
31 yaz_conv_func_t yaz_marc8r_4E_conv;
32 yaz_conv_func_t yaz_marc8r_51_conv;
33 yaz_conv_func_t yaz_marc8r_33_conv;
34 yaz_conv_func_t yaz_marc8r_34_conv;
35 yaz_conv_func_t yaz_marc8r_53_conv;
36 yaz_conv_func_t yaz_marc8r_31_conv;
42 unsigned write_marc8_second_half_char;
43 unsigned long write_marc8_last;
45 const char *write_marc8_lpage;
46 const char *write_marc8_g0;
47 const char *write_marc8_g1;
50 static void init_marc8(yaz_iconv_encoder_t w)
52 struct encoder_data *data = (struct encoder_data *) w->data;
53 data->write_marc8_second_half_char = 0;
54 data->write_marc8_last = 0;
55 data->write_marc8_ncr = 0;
56 data->write_marc8_lpage = 0;
57 data->write_marc8_g0 = ESC "(B";
58 data->write_marc8_g1 = 0;
61 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
62 struct encoder_data *w,
63 char **outbuf, size_t *outbytesleft,
64 const char *page_chr);
66 static unsigned long lookup_marc8(yaz_iconv_t cd,
67 unsigned long x, int *comb,
68 const char **page_chr)
71 char *utf8_outbuf = utf8_buf;
72 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
75 if (x <= ' ' && x != 27)
80 r = yaz_write_UTF8_char(x, &utf8_outbuf, &utf8_outbytesleft, &error_code);
81 if (r == (size_t)(-1))
83 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
89 size_t inbytesleft, no_read_sub = 0;
93 inp = (unsigned char *) utf8_buf;
94 inbytesleft = strlen(utf8_buf);
96 x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
102 x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
105 *page_chr = ESC "(B";
108 x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
114 x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
120 x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
123 *page_chr = ESC "(2";
126 x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
129 *page_chr = ESC "(N";
132 x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
135 *page_chr = ESC "(Q";
138 x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
141 *page_chr = ESC "(3";
144 x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
147 *page_chr = ESC "(4";
150 x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
153 *page_chr = ESC "(S";
156 x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
159 *page_chr = ESC "$1";
162 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
167 static size_t flush_combos(yaz_iconv_t cd,
168 struct encoder_data *w,
169 char **outbuf, size_t *outbytesleft)
171 unsigned long y = w->write_marc8_last;
176 assert(w->write_marc8_lpage);
177 if (w->write_marc8_lpage)
179 size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
180 w->write_marc8_lpage);
185 if (9 >= *outbytesleft)
187 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
188 return (size_t) (-1);
190 if (w->write_marc8_ncr)
192 yaz_snprintf(*outbuf, 9, "&#x%04x;", y);
193 (*outbytesleft) -= 8;
201 byte = (unsigned char )((y>>16) & 0xff);
203 (*outbuf)[out_no++] = byte;
204 byte = (unsigned char)((y>>8) & 0xff);
206 (*outbuf)[out_no++] = byte;
207 byte = (unsigned char )(y & 0xff);
209 (*outbuf)[out_no++] = byte;
211 (*outbytesleft) -= out_no;
214 if (w->write_marc8_second_half_char)
216 *(*outbuf)++ = w->write_marc8_second_half_char;
220 w->write_marc8_last = 0;
221 w->write_marc8_ncr = 0;
222 w->write_marc8_lpage = 0;
223 w->write_marc8_second_half_char = 0;
227 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
228 struct encoder_data *w,
229 char **outbuf, size_t *outbytesleft,
230 const char *page_chr)
232 const char **old_page_chr = &w->write_marc8_g0;
234 /* are we going to a G1-set (such as such as ESC ")!E") */
235 if (page_chr && page_chr[1] == ')')
236 old_page_chr = &w->write_marc8_g1;
238 if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
241 const char *page_out = page_chr;
243 if (*outbytesleft < 8)
245 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
247 return (size_t) (-1);
252 if (!strcmp(*old_page_chr, ESC "p")
253 || !strcmp(*old_page_chr, ESC "g")
254 || !strcmp(*old_page_chr, ESC "b"))
257 /* Technique 1 leave */
258 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
260 /* Must leave script + enter new page */
261 plen = strlen(page_out);
262 memcpy(*outbuf, page_out, plen);
264 (*outbytesleft) -= plen;
269 *old_page_chr = page_chr;
270 plen = strlen(page_out);
271 memcpy(*outbuf, page_out, plen);
273 (*outbytesleft) -= plen;
279 static size_t yaz_write_marc8_2(yaz_iconv_t cd, struct encoder_data *w,
281 char **outbuf, size_t *outbytesleft,
286 const char *page_chr = 0;
287 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
292 return (size_t) (-1);
307 size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
313 w->write_marc8_second_half_char = 0xEC;
314 else if (x == 0x0360)
315 w->write_marc8_second_half_char = 0xFB;
317 if (*outbytesleft <= 1)
319 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
320 return (size_t) (-1);
327 size_t r = flush_combos(cd, w, outbuf, outbytesleft);
331 w->write_marc8_last = y;
332 w->write_marc8_lpage = page_chr;
333 w->write_marc8_ncr = enable_ncr;
338 static size_t flush_marc8(yaz_iconv_t cd, yaz_iconv_encoder_t en,
339 char **outbuf, size_t *outbytesleft)
341 struct encoder_data *w = (struct encoder_data *) en->data;
342 size_t r = flush_combos(cd, w, outbuf, outbytesleft);
345 w->write_marc8_g1 = 0;
346 return yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft, ESC "(B");
349 static size_t yaz_write_marc8_generic(yaz_iconv_t cd, struct encoder_data *w,
351 char **outbuf, size_t *outbytesleft,
354 unsigned long x1, x2;
355 if (yaz_iso_8859_1_lookup_y(x, &x1, &x2))
357 /* save the output pointers .. */
358 char *outbuf0 = *outbuf;
359 size_t outbytesleft0 = *outbytesleft;
360 int last_ch = w->write_marc8_last;
361 int ncr = w->write_marc8_ncr;
362 const char *lpage = w->write_marc8_lpage;
365 r = yaz_write_marc8_2(cd, w, x1,
366 outbuf, outbytesleft, loss_mode);
369 r = yaz_write_marc8_2(cd, w, x2,
370 outbuf, outbytesleft, loss_mode);
371 if (r && yaz_iconv_error(cd) == YAZ_ICONV_E2BIG)
373 /* not enough room. reset output to original values */
375 *outbytesleft = outbytesleft0;
376 w->write_marc8_last = last_ch;
377 w->write_marc8_ncr = ncr;
378 w->write_marc8_lpage = lpage;
382 return yaz_write_marc8_2(cd, w, x, outbuf, outbytesleft, loss_mode);
385 static size_t write_marc8_normal(yaz_iconv_t cd, yaz_iconv_encoder_t e,
387 char **outbuf, size_t *outbytesleft)
389 return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
390 x, outbuf, outbytesleft, 0);
393 static size_t write_marc8_lossy(yaz_iconv_t cd, yaz_iconv_encoder_t e,
395 char **outbuf, size_t *outbytesleft)
397 return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
398 x, outbuf, outbytesleft, 1);
401 static size_t write_marc8_lossless(yaz_iconv_t cd, yaz_iconv_encoder_t e,
403 char **outbuf, size_t *outbytesleft)
405 return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
406 x, outbuf, outbytesleft, 2);
409 static void destroy_marc8(yaz_iconv_encoder_t e)
414 yaz_iconv_encoder_t yaz_marc8_encoder(const char *tocode,
415 yaz_iconv_encoder_t e)
418 if (!yaz_matchstr(tocode, "MARC8"))
419 e->write_handle = write_marc8_normal;
420 else if (!yaz_matchstr(tocode, "MARC8s"))
421 e->write_handle = write_marc8_normal;
422 else if (!yaz_matchstr(tocode, "MARC8lossy"))
423 e->write_handle = write_marc8_lossy;
424 else if (!yaz_matchstr(tocode, "MARC8lossless"))
425 e->write_handle = write_marc8_lossless;
430 struct encoder_data *data = (struct encoder_data *)
431 xmalloc(sizeof(*data));
433 e->destroy_handle = destroy_marc8;
434 e->flush_handle = flush_marc8;
435 e->init_handle = init_marc8;
444 * c-file-style: "Stroustrup"
445 * indent-tabs-mode: nil
447 * vim: shiftwidth=4 tabstop=8 expandtab