From 4e58607ee6ef2a5d183262b9a87415fed2311c85 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 15 Dec 2008 15:41:34 +0100 Subject: [PATCH] Added Danmarc character set decoder (bug #2345). --- src/Makefile.am | 2 +- src/iconv-p.h | 2 + src/iconv_decode_danmarc.c | 132 ++++++++++++++++++++++++++++++++++++++++++++ src/siconv.c | 2 + test/tsticonv.c | 23 ++++++++ win/makefile | 1 + 6 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 src/iconv_decode_danmarc.c diff --git a/src/Makefile.am b/src/Makefile.am index cc8c903..e72c1ee 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -105,7 +105,7 @@ libyaz_la_SOURCES=version.c options.c log.c \ record_conv.c retrieval.c elementset.c snprintf.c query-charset.c \ copy_types.c match_glob.c poll.c daemon.c \ iconv_encode_marc8.c iconv_encode_iso_8859_1.c iconv_encode_wchar.c \ - iconv_decode_marc8.c iconv_decode_iso5426.c sc.c + iconv_decode_marc8.c iconv_decode_iso5426.c iconv_decode_danmarc.c sc.c libyaz_la_LDFLAGS=-version-info $(YAZ_VERSION_INFO) diff --git a/src/iconv-p.h b/src/iconv-p.h index f99943b..e5e92e8 100644 --- a/src/iconv-p.h +++ b/src/iconv-p.h @@ -103,6 +103,8 @@ yaz_iconv_decoder_t yaz_advancegreek_decoder(const char *name, yaz_iconv_decoder_t d); yaz_iconv_decoder_t yaz_wchar_decoder(const char *fromcode, yaz_iconv_decoder_t d); +yaz_iconv_decoder_t yaz_danmarc_decoder(const char *fromcode, + yaz_iconv_decoder_t d); #endif /* diff --git a/src/iconv_decode_danmarc.c b/src/iconv_decode_danmarc.c new file mode 100644 index 0000000..3dc0f6e --- /dev/null +++ b/src/iconv_decode_danmarc.c @@ -0,0 +1,132 @@ +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2008 Index Data + * See the file LICENSE for details. + */ +/** + * \file + * \brief Danmarc2 character set decoding + * + */ + +#if HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include "iconv-p.h" + +struct decoder_data { + unsigned long x_back; +}; + +static unsigned long read_danmarc(yaz_iconv_t cd, + yaz_iconv_decoder_t d, + unsigned char *inp, + size_t inbytesleft, size_t *no_read) +{ + struct decoder_data *data = (struct decoder_data *) d->data; + unsigned long x = inp[0]; + + if (data->x_back) + { + *no_read = 1; + x = data->x_back; + data->x_back = 0; + return x; + } + + if (x == '@') + { + if (inbytesleft < 2) + { + yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL); + *no_read = 0; + return 0; + } + switch(inp[1]) + { + case '@': + case '*': + case 0xa4: /* CURRENCY SIGN */ + x = inp[1]; + *no_read = 2; + break; + case 0xe5: /* LATIN SMALL LETTER A WITH RING ABOVE */ + x = 'a'; + data->x_back = 'a'; + *no_read = 1; + break; + case 0xc5: /* LATIN CAPITAL LETTER A WITH RING ABOVE */ + x = 'A'; + data->x_back = 'a'; + *no_read = 1; + break; + default: + if (inbytesleft < 5) + { + yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL); + *no_read = 0; + return 0; + } + else + { + unsigned long v; + sscanf(inp+1, "%4lx", &v); + *no_read = 5; + x = v; + } + } + } + else + *no_read = 1; + return x; +} + + +static size_t init_danmarc(yaz_iconv_t cd, yaz_iconv_decoder_t d, + unsigned char *inp, + size_t inbytesleft, size_t *no_read) +{ + struct decoder_data *data = (struct decoder_data *) d->data; + data->x_back = 0; + return 0; +} + +void destroy_danmarc(yaz_iconv_decoder_t d) +{ + struct decoder_data *data = (struct decoder_data *) d->data; + xfree(data); +} + +yaz_iconv_decoder_t yaz_danmarc_decoder(const char *fromcode, + yaz_iconv_decoder_t d) + +{ + if (!yaz_matchstr(fromcode, "danmarc")) + { + struct decoder_data *data = (struct decoder_data *) + xmalloc(sizeof(*data)); + d->data = data; + data->x_back = 0; + d->read_handle = read_danmarc; + d->init_handle = init_danmarc; + d->destroy_handle = destroy_danmarc; + return d; + } + return 0; +} + + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/siconv.c b/src/siconv.c index d7f066d..697afe8 100644 --- a/src/siconv.c +++ b/src/siconv.c @@ -92,6 +92,8 @@ static int prepare_decoders(yaz_iconv_t cd, const char *tocode) return 1; if (yaz_wchar_decoder(tocode, &cd->decoder)) return 1; + if (yaz_danmarc_decoder(tocode, &cd->decoder)) + return 1; return 0; } diff --git a/test/tsticonv.c b/test/tsticonv.c index 20424d6..5bc649a 100644 --- a/test/tsticonv.c +++ b/test/tsticonv.c @@ -681,6 +681,27 @@ static void tst_utf8_codes(void) YAZ_CHECK(utf8_check(100000000)); } +static void tst_danmarc_to_latin1(void) +{ + yaz_iconv_t cd = yaz_iconv_open("iso-8859-1", "danmarc"); + + YAZ_CHECK(cd); + if (!cd) + return; + + YAZ_CHECK(tst_convert(cd, "ax", "ax")); + + YAZ_CHECK(tst_convert(cd, "a@@b", "a@b")); + YAZ_CHECK(tst_convert(cd, "a@@@@b", "a@@b")); + YAZ_CHECK(tst_convert(cd, "@000ab", "\nb")); + + YAZ_CHECK(tst_convert(cd, "@\xe5", "aa")); + YAZ_CHECK(tst_convert(cd, "@\xc5.", "Aa.")); + + yaz_iconv_close(cd); +} + + int main (int argc, char **argv) { YAZ_CHECK_INIT(argc, argv); @@ -700,6 +721,8 @@ int main (int argc, char **argv) tst_utf8_to_marc8("marc8lossy"); tst_utf8_to_marc8("marc8lossless"); + tst_danmarc_to_latin1(); + tst_latin1_to_marc8(); tst_marc8_to_ucs4b(); diff --git a/win/makefile b/win/makefile index c096690..ee78c78 100644 --- a/win/makefile +++ b/win/makefile @@ -495,6 +495,7 @@ MISC_OBJS= \ $(OBJDIR)\iconv_decode_marc8.obj \ $(OBJDIR)\iconv_encode_wchar.obj \ $(OBJDIR)\iconv_decode_iso5426.obj \ + $(OBJDIR)\iconv_decode_danmarc.obj \ $(OBJDIR)\mutex.obj \ $(OBJDIR)\sc.obj -- 1.7.10.4