From daa259b6e86d28f72c82488ee9b0f7bb2ce52b8c Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 21 Dec 2006 20:50:56 +0000 Subject: [PATCH] Fixed Unicode to MARC-8 conversion for certain less preferred UTF-8 sequences. Conversion for first/second half double tilde + first/second half ligature was changed. Conversion should now perform: 6f ef b8 a0 --> eb 6f, 6f ef b8 a1 --> ec 6f, 6f ef b8 a2 --> fa 6f, 6f ef b8 a3 --> fb 6f. --- src/siconv.c | 21 ++++++++++++--------- test/marccol2.u8.2.lst | 2 +- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/siconv.c b/src/siconv.c index d8bc0f0..c2cbb2f 100644 --- a/src/siconv.c +++ b/src/siconv.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2006, Index Data ApS * See the file LICENSE for details. * - * $Id: siconv.c,v 1.30 2006-12-19 22:41:28 adam Exp $ + * $Id: siconv.c,v 1.31 2006-12-21 20:50:56 adam Exp $ */ /** * \file siconv.c @@ -100,6 +100,7 @@ struct yaz_iconv_struct { unsigned long write_marc8_comb_ch[8]; size_t write_marc8_comb_no; + unsigned write_marc8_second_half_char; unsigned long write_marc8_last; const char *write_marc8_page_chr; }; @@ -756,7 +757,7 @@ static size_t flush_combos(yaz_iconv_t cd, char **outbuf, size_t *outbytesleft) { unsigned long y = cd->write_marc8_last; - unsigned char byte, second_half = 0; + unsigned char byte; char out_buf[10]; size_t i, out_no = 0; @@ -783,25 +784,21 @@ static size_t flush_combos(yaz_iconv_t cd, { /* all MARC-8 combined characters are simple bytes */ byte = (unsigned char )(cd->write_marc8_comb_ch[i]); - if (byte == 0xEB) - second_half = 0xEC; - else if (byte == 0xFA) - second_half = 0xFB; - *(*outbuf)++ = byte; (*outbytesleft)--; } memcpy(*outbuf, out_buf, out_no); *outbuf += out_no; (*outbytesleft) -= out_no; - if (second_half) + if (cd->write_marc8_second_half_char) { - *(*outbuf)++ = second_half; + *(*outbuf)++ = cd->write_marc8_second_half_char; (*outbytesleft)--; } cd->write_marc8_last = 0; cd->write_marc8_comb_no = 0; + cd->write_marc8_second_half_char = 0; return 0; } @@ -818,6 +815,11 @@ static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x, if (comb) { + if (x == 0x0361) + cd->write_marc8_second_half_char = 0xEC; + else if (x == 0x0360) + cd->write_marc8_second_half_char = 0xFB; + if (cd->write_marc8_comb_no < 6) cd->write_marc8_comb_ch[cd->write_marc8_comb_no++] = y; } @@ -1078,6 +1080,7 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft, cd->compose_char = 0; cd->write_marc8_comb_no = 0; + cd->write_marc8_second_half_char = 0; cd->write_marc8_last = 0; cd->write_marc8_page_chr = "\033(B"; diff --git a/test/marccol2.u8.2.lst b/test/marccol2.u8.2.lst index 27319c2..b083089 100644 --- a/test/marccol2.u8.2.lst +++ b/test/marccol2.u8.2.lst @@ -1,4 +1,4 @@ -03093cam a2200337 i 4500 +03091cam a2200337 i 4500 001 12683849 005 20051218154744.0 008 981008b2001 ilu 000 0 eng -- 1.7.10.4